datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Unauthorized customer/vendor discount (sweethearting, side deals).
352    UnauthorizedDiscount,
353    /// Round-tripping funds through multiple entities or accounts to
354    /// inflate apparent activity or obscure origin.
355    RoundTripping,
356    /// Invoice manipulation.
357    InvoiceManipulation,
358
359    // Asset Fraud
360    /// Misappropriation of assets.
361    AssetMisappropriation,
362    /// Inventory theft.
363    InventoryTheft,
364    /// Ghost employee.
365    GhostEmployee,
366
367    // Financial Statement Fraud
368    /// Premature revenue recognition.
369    PrematureRevenue,
370    /// Understated liabilities.
371    UnderstatedLiabilities,
372    /// Overstated assets.
373    OverstatedAssets,
374    /// Channel stuffing.
375    ChannelStuffing,
376
377    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
378    /// Improper revenue recognition timing (ASC 606/IFRS 15).
379    ImproperRevenueRecognition,
380    /// Multiple performance obligations not properly separated.
381    ImproperPoAllocation,
382    /// Variable consideration not properly estimated.
383    VariableConsiderationManipulation,
384    /// Contract modifications not properly accounted for.
385    ContractModificationMisstatement,
386
387    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
388    /// Lease classification manipulation (operating vs finance).
389    LeaseClassificationManipulation,
390    /// Off-balance sheet lease fraud.
391    OffBalanceSheetLease,
392    /// Lease liability understatement.
393    LeaseLiabilityUnderstatement,
394    /// ROU asset misstatement.
395    RouAssetMisstatement,
396
397    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
398    /// Fair value hierarchy misclassification.
399    FairValueHierarchyManipulation,
400    /// Level 3 input manipulation.
401    Level3InputManipulation,
402    /// Valuation technique manipulation.
403    ValuationTechniqueManipulation,
404
405    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
406    /// Delayed impairment recognition.
407    DelayedImpairment,
408    /// Improperly avoiding impairment testing.
409    ImpairmentTestAvoidance,
410    /// Cash flow projection manipulation for impairment.
411    CashFlowProjectionManipulation,
412    /// Improper impairment reversal (IFRS only).
413    ImproperImpairmentReversal,
414
415    // Sourcing/Procurement Fraud (S2C)
416    /// Bid rigging or collusion among bidders.
417    BidRigging,
418    /// Contracts with phantom/shell vendors.
419    PhantomVendorContract,
420    /// Splitting contracts to avoid approval thresholds.
421    SplitContractThreshold,
422    /// Conflict of interest in sourcing decisions.
423    ConflictOfInterestSourcing,
424
425    // HR/Payroll Fraud (H2R)
426    /// Ghost employee on payroll.
427    GhostEmployeePayroll,
428    /// Payroll inflation/unauthorized raises.
429    PayrollInflation,
430    /// Duplicate expense report submission.
431    DuplicateExpenseReport,
432    /// Fictitious expense claims.
433    FictitiousExpense,
434    /// Splitting expenses to avoid approval threshold.
435    SplitExpenseToAvoidApproval,
436
437    // O2C Fraud
438    /// Revenue timing manipulation via quotes.
439    RevenueTimingManipulation,
440    /// Overriding quote prices without authorization.
441    QuotePriceOverride,
442}
443
444impl FraudType {
445    /// Returns severity level (1-5).
446    pub fn severity(&self) -> u8 {
447        match self {
448            FraudType::RoundDollarManipulation => 2,
449            FraudType::JustBelowThreshold => 3,
450            FraudType::SelfApproval => 3,
451            FraudType::ExceededApprovalLimit => 3,
452            FraudType::DuplicatePayment => 3,
453            FraudType::FictitiousEntry => 4,
454            FraudType::RevenueManipulation => 5,
455            FraudType::FictitiousVendor => 5,
456            FraudType::ShellCompanyPayment => 5,
457            FraudType::AssetMisappropriation => 5,
458            FraudType::SegregationOfDutiesViolation => 4,
459            FraudType::CollusiveApproval => 5,
460            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
461            FraudType::ImproperRevenueRecognition => 5,
462            FraudType::ImproperPoAllocation => 4,
463            FraudType::VariableConsiderationManipulation => 4,
464            FraudType::ContractModificationMisstatement => 3,
465            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
466            FraudType::LeaseClassificationManipulation => 4,
467            FraudType::OffBalanceSheetLease => 5,
468            FraudType::LeaseLiabilityUnderstatement => 4,
469            FraudType::RouAssetMisstatement => 3,
470            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
471            FraudType::FairValueHierarchyManipulation => 4,
472            FraudType::Level3InputManipulation => 5,
473            FraudType::ValuationTechniqueManipulation => 4,
474            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
475            FraudType::DelayedImpairment => 4,
476            FraudType::ImpairmentTestAvoidance => 4,
477            FraudType::CashFlowProjectionManipulation => 5,
478            FraudType::ImproperImpairmentReversal => 3,
479            _ => 4,
480        }
481    }
482}
483
484/// Error types for error detection.
485#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
486pub enum ErrorType {
487    // Data Entry Errors
488    /// Duplicate document entry.
489    DuplicateEntry,
490    /// Reversed debit/credit amounts.
491    ReversedAmount,
492    /// Transposed digits in amount.
493    TransposedDigits,
494    /// Wrong decimal placement.
495    DecimalError,
496    /// Missing required field.
497    MissingField,
498    /// Invalid account code.
499    InvalidAccount,
500
501    // Timing Errors
502    /// Posted to wrong period.
503    WrongPeriod,
504    /// Backdated entry.
505    BackdatedEntry,
506    /// Future-dated entry.
507    FutureDatedEntry,
508    /// Cutoff error.
509    CutoffError,
510
511    // Classification Errors
512    /// Wrong account classification.
513    MisclassifiedAccount,
514    /// Wrong cost center.
515    WrongCostCenter,
516    /// Wrong company code.
517    WrongCompanyCode,
518
519    // Calculation Errors
520    /// Unbalanced journal entry.
521    UnbalancedEntry,
522    /// Rounding error.
523    RoundingError,
524    /// Currency conversion error.
525    CurrencyError,
526    /// Tax calculation error.
527    TaxCalculationError,
528
529    // Accounting Standards Errors (Non-Fraudulent)
530    /// Wrong revenue recognition timing (honest mistake).
531    RevenueTimingError,
532    /// Performance obligation allocation error.
533    PoAllocationError,
534    /// Lease classification error (operating vs finance).
535    LeaseClassificationError,
536    /// Lease calculation error (PV, amortization).
537    LeaseCalculationError,
538    /// Fair value measurement error.
539    FairValueError,
540    /// Impairment calculation error.
541    ImpairmentCalculationError,
542    /// Discount rate error.
543    DiscountRateError,
544    /// Framework application error (IFRS vs GAAP).
545    FrameworkApplicationError,
546}
547
548impl ErrorType {
549    /// Returns severity level (1-5).
550    pub fn severity(&self) -> u8 {
551        match self {
552            ErrorType::RoundingError => 1,
553            ErrorType::MissingField => 2,
554            ErrorType::TransposedDigits => 2,
555            ErrorType::DecimalError => 3,
556            ErrorType::DuplicateEntry => 3,
557            ErrorType::ReversedAmount => 3,
558            ErrorType::WrongPeriod => 4,
559            ErrorType::UnbalancedEntry => 5,
560            ErrorType::CurrencyError => 4,
561            // Accounting Standards Errors
562            ErrorType::RevenueTimingError => 4,
563            ErrorType::PoAllocationError => 3,
564            ErrorType::LeaseClassificationError => 3,
565            ErrorType::LeaseCalculationError => 3,
566            ErrorType::FairValueError => 4,
567            ErrorType::ImpairmentCalculationError => 4,
568            ErrorType::DiscountRateError => 3,
569            ErrorType::FrameworkApplicationError => 4,
570            _ => 3,
571        }
572    }
573}
574
575/// Process issue types.
576#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
577pub enum ProcessIssueType {
578    // Approval Issues
579    /// Approval skipped entirely.
580    SkippedApproval,
581    /// Late approval (after posting).
582    LateApproval,
583    /// Missing supporting documentation.
584    MissingDocumentation,
585    /// Incomplete approval chain.
586    IncompleteApprovalChain,
587
588    // Timing Issues
589    /// Late posting.
590    LatePosting,
591    /// Posting outside business hours.
592    AfterHoursPosting,
593    /// Weekend/holiday posting.
594    WeekendPosting,
595    /// Rushed period-end posting.
596    RushedPeriodEnd,
597    /// Entry posted after the period-end close date (ISA 240.32).
598    /// Distinct from `RushedPeriodEnd` which flags pre-close volume spikes —
599    /// this variant specifically marks post-close adjustments.
600    PostClosePosting,
601
602    // Control Issues
603    /// Manual override of system control.
604    ManualOverride,
605    /// Unusual user access pattern.
606    UnusualAccess,
607    /// System bypass.
608    SystemBypass,
609    /// Batch processing anomaly.
610    BatchAnomaly,
611
612    // Documentation Issues
613    /// Vague or missing description.
614    VagueDescription,
615    /// Changed after posting.
616    PostFactoChange,
617    /// Incomplete audit trail.
618    IncompleteAuditTrail,
619
620    // Sourcing/Procurement Issues (S2C)
621    /// Purchasing outside of contracts (maverick spend).
622    MaverickSpend,
623    /// Purchasing against an expired contract.
624    ExpiredContractPurchase,
625    /// Overriding contracted price without authorization.
626    ContractPriceOverride,
627    /// Award given with only a single bid received.
628    SingleBidAward,
629    /// Bypassing supplier qualification requirements.
630    QualificationBypass,
631
632    // O2C Issues
633    /// Converting an expired quote to a sales order.
634    ExpiredQuoteConversion,
635}
636
637impl ProcessIssueType {
638    /// Returns severity level (1-5).
639    pub fn severity(&self) -> u8 {
640        match self {
641            ProcessIssueType::VagueDescription => 1,
642            ProcessIssueType::LatePosting => 2,
643            ProcessIssueType::AfterHoursPosting => 2,
644            ProcessIssueType::WeekendPosting => 2,
645            ProcessIssueType::PostClosePosting => 4,
646            ProcessIssueType::SkippedApproval => 4,
647            ProcessIssueType::ManualOverride => 4,
648            ProcessIssueType::SystemBypass => 5,
649            ProcessIssueType::IncompleteAuditTrail => 4,
650            _ => 3,
651        }
652    }
653}
654
655/// Statistical anomaly types.
656#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
657pub enum StatisticalAnomalyType {
658    // Amount Anomalies
659    /// Amount significantly above normal.
660    UnusuallyHighAmount,
661    /// Amount significantly below normal.
662    UnusuallyLowAmount,
663    /// Violates Benford's Law distribution.
664    BenfordViolation,
665    /// Exact duplicate amount (suspicious).
666    ExactDuplicateAmount,
667    /// Repeating pattern in amounts.
668    RepeatingAmount,
669
670    // Frequency Anomalies
671    /// Unusual transaction frequency.
672    UnusualFrequency,
673    /// Burst of transactions.
674    TransactionBurst,
675    /// Unusual time of day.
676    UnusualTiming,
677
678    // Trend Anomalies
679    /// Break in historical trend.
680    TrendBreak,
681    /// Sudden level shift.
682    LevelShift,
683    /// Seasonal pattern violation.
684    SeasonalAnomaly,
685
686    // Distribution Anomalies
687    /// Outlier in distribution.
688    StatisticalOutlier,
689    /// Change in variance.
690    VarianceChange,
691    /// Distribution shift.
692    DistributionShift,
693
694    // Sourcing/Contract Anomalies
695    /// Pattern of SLA breaches from a vendor.
696    SlaBreachPattern,
697    /// Contract with zero utilization.
698    UnusedContract,
699
700    // HR/Payroll Anomalies
701    /// Anomalous overtime patterns.
702    OvertimeAnomaly,
703}
704
705impl StatisticalAnomalyType {
706    /// Returns severity level (1-5).
707    pub fn severity(&self) -> u8 {
708        match self {
709            StatisticalAnomalyType::UnusualTiming => 1,
710            StatisticalAnomalyType::UnusualFrequency => 2,
711            StatisticalAnomalyType::BenfordViolation => 2,
712            StatisticalAnomalyType::UnusuallyHighAmount => 3,
713            StatisticalAnomalyType::TrendBreak => 3,
714            StatisticalAnomalyType::TransactionBurst => 4,
715            StatisticalAnomalyType::ExactDuplicateAmount => 3,
716            _ => 3,
717        }
718    }
719}
720
721/// Relational/graph anomaly types.
722#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
723pub enum RelationalAnomalyType {
724    // Transaction Pattern Anomalies
725    /// Circular transaction pattern.
726    CircularTransaction,
727    /// Unusual account combination.
728    UnusualAccountPair,
729    /// New trading partner.
730    NewCounterparty,
731    /// Dormant account suddenly active.
732    DormantAccountActivity,
733
734    // Network Anomalies
735    /// Unusual network centrality.
736    CentralityAnomaly,
737    /// Isolated transaction cluster.
738    IsolatedCluster,
739    /// Bridge node anomaly.
740    BridgeNodeAnomaly,
741    /// Community structure change.
742    CommunityAnomaly,
743
744    // Relationship Anomalies
745    /// Missing expected relationship.
746    MissingRelationship,
747    /// Unexpected relationship.
748    UnexpectedRelationship,
749    /// Relationship strength change.
750    RelationshipStrengthChange,
751
752    // Intercompany Anomalies
753    /// Unmatched intercompany transaction.
754    UnmatchedIntercompany,
755    /// Circular intercompany flow.
756    CircularIntercompany,
757    /// Transfer pricing anomaly.
758    TransferPricingAnomaly,
759
760    // Source-conditional anomalies (SOTA-12, #140)
761    /// JE uses a `(source, account-pair)` combination that is rare under the
762    /// per-source marginal P(account | source) — the single dominant explainer
763    /// for audit-packet top JEs (FINDINGS §13). Selected by the orchestrator's
764    /// anomaly-injection post-process from the generated JE set.
765    SourceConditionalRarity,
766}
767
768impl RelationalAnomalyType {
769    /// Returns severity level (1-5).
770    pub fn severity(&self) -> u8 {
771        match self {
772            RelationalAnomalyType::NewCounterparty => 1,
773            RelationalAnomalyType::DormantAccountActivity => 2,
774            RelationalAnomalyType::UnusualAccountPair => 2,
775            RelationalAnomalyType::CircularTransaction => 4,
776            RelationalAnomalyType::CircularIntercompany => 4,
777            RelationalAnomalyType::TransferPricingAnomaly => 4,
778            RelationalAnomalyType::UnmatchedIntercompany => 3,
779            RelationalAnomalyType::SourceConditionalRarity => 2,
780            _ => 3,
781        }
782    }
783}
784
785/// A labeled anomaly for supervised learning.
786#[derive(Debug, Clone, Serialize, Deserialize)]
787pub struct LabeledAnomaly {
788    /// Unique anomaly identifier.
789    pub anomaly_id: String,
790    /// Type of anomaly.
791    pub anomaly_type: AnomalyType,
792    /// Document or entity that contains the anomaly.
793    pub document_id: String,
794    /// Document type (JE, PO, Invoice, etc.).
795    pub document_type: String,
796    /// Company code.
797    pub company_code: String,
798    /// Date the anomaly occurred.
799    pub anomaly_date: NaiveDate,
800    /// Timestamp when detected/injected.
801    #[serde(with = "crate::serde_timestamp::naive")]
802    pub detection_timestamp: NaiveDateTime,
803    /// Confidence score (0.0 - 1.0) for injected anomalies.
804    pub confidence: f64,
805    /// Severity (1-5).
806    pub severity: u8,
807    /// Description of the anomaly.
808    pub description: String,
809    /// Related entities (user IDs, account codes, etc.).
810    pub related_entities: Vec<String>,
811    /// Monetary impact if applicable.
812    pub monetary_impact: Option<Decimal>,
813    /// Additional metadata.
814    pub metadata: HashMap<String, String>,
815    /// Whether this was injected (true) or naturally occurring (false).
816    pub is_injected: bool,
817    /// Injection strategy used (if injected) - legacy string field.
818    pub injection_strategy: Option<String>,
819    /// Cluster ID if part of an anomaly cluster.
820    pub cluster_id: Option<String>,
821
822    // ========================================
823    // PROVENANCE TRACKING FIELDS (Phase 1.2)
824    // ========================================
825    /// Hash of the original document before modification.
826    /// Enables tracking what the document looked like pre-injection.
827    #[serde(default, skip_serializing_if = "Option::is_none")]
828    pub original_document_hash: Option<String>,
829
830    /// Causal reason explaining why this anomaly was injected.
831    /// Provides "why" tracking for each anomaly.
832    #[serde(default, skip_serializing_if = "Option::is_none")]
833    pub causal_reason: Option<AnomalyCausalReason>,
834
835    /// Structured injection strategy with parameters.
836    /// More detailed than the legacy string-based injection_strategy field.
837    #[serde(default, skip_serializing_if = "Option::is_none")]
838    pub structured_strategy: Option<InjectionStrategy>,
839
840    /// Parent anomaly ID if this was derived from another anomaly.
841    /// Enables anomaly transformation chains.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub parent_anomaly_id: Option<String>,
844
845    /// Child anomaly IDs that were derived from this anomaly.
846    #[serde(default, skip_serializing_if = "Vec::is_empty")]
847    pub child_anomaly_ids: Vec<String>,
848
849    /// Scenario ID if this anomaly is part of a multi-step scenario.
850    #[serde(default, skip_serializing_if = "Option::is_none")]
851    pub scenario_id: Option<String>,
852
853    /// Generation run ID that produced this anomaly.
854    /// Enables tracing anomalies back to their generation run.
855    #[serde(default, skip_serializing_if = "Option::is_none")]
856    pub run_id: Option<String>,
857
858    /// Seed used for RNG during generation.
859    /// Enables reproducibility.
860    #[serde(default, skip_serializing_if = "Option::is_none")]
861    pub generation_seed: Option<u64>,
862}
863
864impl LabeledAnomaly {
865    /// Creates a new labeled anomaly.
866    pub fn new(
867        anomaly_id: String,
868        anomaly_type: AnomalyType,
869        document_id: String,
870        document_type: String,
871        company_code: String,
872        anomaly_date: NaiveDate,
873    ) -> Self {
874        let severity = anomaly_type.severity();
875        let description = format!(
876            "{} - {} in document {}",
877            anomaly_type.category(),
878            anomaly_type.type_name(),
879            document_id
880        );
881
882        Self {
883            anomaly_id,
884            anomaly_type,
885            document_id,
886            document_type,
887            company_code,
888            anomaly_date,
889            detection_timestamp: chrono::Local::now().naive_local(),
890            confidence: 1.0,
891            severity,
892            description,
893            related_entities: Vec::new(),
894            monetary_impact: None,
895            metadata: HashMap::new(),
896            is_injected: true,
897            injection_strategy: None,
898            cluster_id: None,
899            // Provenance fields
900            original_document_hash: None,
901            causal_reason: None,
902            structured_strategy: None,
903            parent_anomaly_id: None,
904            child_anomaly_ids: Vec::new(),
905            scenario_id: None,
906            run_id: None,
907            generation_seed: None,
908        }
909    }
910
911    /// Sets the description.
912    pub fn with_description(mut self, description: &str) -> Self {
913        self.description = description.to_string();
914        self
915    }
916
917    /// Sets the monetary impact.
918    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
919        self.monetary_impact = Some(impact);
920        self
921    }
922
923    /// Adds a related entity.
924    pub fn with_related_entity(mut self, entity: &str) -> Self {
925        self.related_entities.push(entity.to_string());
926        self
927    }
928
929    /// Adds metadata.
930    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
931        self.metadata.insert(key.to_string(), value.to_string());
932        self
933    }
934
935    /// Sets the injection strategy (legacy string).
936    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
937        self.injection_strategy = Some(strategy.to_string());
938        self
939    }
940
941    /// Sets the cluster ID.
942    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
943        self.cluster_id = Some(cluster_id.to_string());
944        self
945    }
946
947    // ========================================
948    // PROVENANCE BUILDER METHODS (Phase 1.2)
949    // ========================================
950
951    /// Sets the original document hash for provenance tracking.
952    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
953        self.original_document_hash = Some(hash.to_string());
954        self
955    }
956
957    /// Sets the causal reason for this anomaly.
958    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
959        self.causal_reason = Some(reason);
960        self
961    }
962
963    /// Sets the structured injection strategy.
964    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
965        // Also set the legacy string field for backward compatibility
966        self.injection_strategy = Some(strategy.strategy_type().to_string());
967        self.structured_strategy = Some(strategy);
968        self
969    }
970
971    /// Sets the parent anomaly ID (for anomaly derivation chains).
972    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
973        self.parent_anomaly_id = Some(parent_id.to_string());
974        self
975    }
976
977    /// Adds a child anomaly ID.
978    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
979        self.child_anomaly_ids.push(child_id.to_string());
980        self
981    }
982
983    /// Sets the scenario ID for multi-step scenario tracking.
984    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
985        self.scenario_id = Some(scenario_id.to_string());
986        self
987    }
988
989    /// Sets the generation run ID.
990    pub fn with_run_id(mut self, run_id: &str) -> Self {
991        self.run_id = Some(run_id.to_string());
992        self
993    }
994
995    /// Sets the generation seed for reproducibility.
996    pub fn with_generation_seed(mut self, seed: u64) -> Self {
997        self.generation_seed = Some(seed);
998        self
999    }
1000
1001    /// Sets multiple provenance fields at once for convenience.
1002    pub fn with_provenance(
1003        mut self,
1004        run_id: Option<&str>,
1005        seed: Option<u64>,
1006        causal_reason: Option<AnomalyCausalReason>,
1007    ) -> Self {
1008        if let Some(id) = run_id {
1009            self.run_id = Some(id.to_string());
1010        }
1011        self.generation_seed = seed;
1012        self.causal_reason = causal_reason;
1013        self
1014    }
1015
1016    /// Converts to a feature vector for ML.
1017    ///
1018    /// Returns a vector of 15 features:
1019    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1020    /// - 1 feature: Severity (normalized 0-1)
1021    /// - 1 feature: Confidence
1022    /// - 1 feature: Has monetary impact (0/1)
1023    /// - 1 feature: Monetary impact (log-scaled)
1024    /// - 1 feature: Is intentional (0/1)
1025    /// - 1 feature: Number of related entities
1026    /// - 1 feature: Is part of cluster (0/1)
1027    /// - 1 feature: Is part of scenario (0/1)
1028    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1029    pub fn to_features(&self) -> Vec<f64> {
1030        let mut features = Vec::new();
1031
1032        // Category one-hot encoding
1033        let categories = [
1034            "Fraud",
1035            "Error",
1036            "ProcessIssue",
1037            "Statistical",
1038            "Relational",
1039            "Custom",
1040        ];
1041        for cat in &categories {
1042            features.push(if self.anomaly_type.category() == *cat {
1043                1.0
1044            } else {
1045                0.0
1046            });
1047        }
1048
1049        // Severity (normalized)
1050        features.push(self.severity as f64 / 5.0);
1051
1052        // Confidence
1053        features.push(self.confidence);
1054
1055        // Has monetary impact
1056        features.push(if self.monetary_impact.is_some() {
1057            1.0
1058        } else {
1059            0.0
1060        });
1061
1062        // Monetary impact (log-scaled)
1063        if let Some(impact) = self.monetary_impact {
1064            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1065            features.push((impact_f64.abs() + 1.0).ln());
1066        } else {
1067            features.push(0.0);
1068        }
1069
1070        // Is intentional
1071        features.push(if self.anomaly_type.is_intentional() {
1072            1.0
1073        } else {
1074            0.0
1075        });
1076
1077        // Number of related entities
1078        features.push(self.related_entities.len() as f64);
1079
1080        // Is part of cluster
1081        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1082
1083        // Provenance features
1084        // Is part of scenario
1085        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1086
1087        // Has parent anomaly (indicates this is a derived anomaly)
1088        features.push(if self.parent_anomaly_id.is_some() {
1089            1.0
1090        } else {
1091            0.0
1092        });
1093
1094        features
1095    }
1096
1097    /// Returns the number of features in the feature vector.
1098    pub fn feature_count() -> usize {
1099        15 // 6 category + 9 other features
1100    }
1101
1102    /// Returns feature names for documentation/ML metadata.
1103    pub fn feature_names() -> Vec<&'static str> {
1104        vec![
1105            "category_fraud",
1106            "category_error",
1107            "category_process_issue",
1108            "category_statistical",
1109            "category_relational",
1110            "category_custom",
1111            "severity_normalized",
1112            "confidence",
1113            "has_monetary_impact",
1114            "monetary_impact_log",
1115            "is_intentional",
1116            "related_entity_count",
1117            "is_clustered",
1118            "is_scenario_part",
1119            "is_derived",
1120        ]
1121    }
1122}
1123
1124/// Summary of anomalies for reporting.
1125#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1126pub struct AnomalySummary {
1127    /// Total anomaly count.
1128    pub total_count: usize,
1129    /// Count by category.
1130    pub by_category: HashMap<String, usize>,
1131    /// Count by specific type.
1132    pub by_type: HashMap<String, usize>,
1133    /// Count by severity.
1134    pub by_severity: HashMap<u8, usize>,
1135    /// Count by company.
1136    pub by_company: HashMap<String, usize>,
1137    /// Total monetary impact.
1138    pub total_monetary_impact: Decimal,
1139    /// Date range.
1140    pub date_range: Option<(NaiveDate, NaiveDate)>,
1141    /// Number of clusters.
1142    pub cluster_count: usize,
1143}
1144
1145impl AnomalySummary {
1146    /// Creates a summary from a list of anomalies.
1147    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1148        let mut summary = AnomalySummary {
1149            total_count: anomalies.len(),
1150            ..Default::default()
1151        };
1152
1153        let mut min_date: Option<NaiveDate> = None;
1154        let mut max_date: Option<NaiveDate> = None;
1155        let mut clusters = std::collections::HashSet::new();
1156
1157        for anomaly in anomalies {
1158            // By category
1159            *summary
1160                .by_category
1161                .entry(anomaly.anomaly_type.category().to_string())
1162                .or_insert(0) += 1;
1163
1164            // By type
1165            *summary
1166                .by_type
1167                .entry(anomaly.anomaly_type.type_name())
1168                .or_insert(0) += 1;
1169
1170            // By severity
1171            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1172
1173            // By company
1174            *summary
1175                .by_company
1176                .entry(anomaly.company_code.clone())
1177                .or_insert(0) += 1;
1178
1179            // Monetary impact
1180            if let Some(impact) = anomaly.monetary_impact {
1181                summary.total_monetary_impact += impact;
1182            }
1183
1184            // Date range
1185            match min_date {
1186                None => min_date = Some(anomaly.anomaly_date),
1187                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1188                _ => {}
1189            }
1190            match max_date {
1191                None => max_date = Some(anomaly.anomaly_date),
1192                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1193                _ => {}
1194            }
1195
1196            // Clusters
1197            if let Some(cluster_id) = &anomaly.cluster_id {
1198                clusters.insert(cluster_id.clone());
1199            }
1200        }
1201
1202        summary.date_range = min_date.zip(max_date);
1203        summary.cluster_count = clusters.len();
1204
1205        summary
1206    }
1207}
1208
1209// ============================================================================
1210// ENHANCED ANOMALY TAXONOMY (FR-003)
1211// ============================================================================
1212
1213/// High-level anomaly category for multi-class classification.
1214///
1215/// These categories provide a more granular classification than the base
1216/// AnomalyType enum, enabling better ML model training and audit reporting.
1217#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1218pub enum AnomalyCategory {
1219    // Vendor-related anomalies
1220    /// Fictitious or shell vendor.
1221    FictitiousVendor,
1222    /// Kickback or collusion with vendor.
1223    VendorKickback,
1224    /// Related party vendor transactions.
1225    RelatedPartyVendor,
1226
1227    // Transaction-related anomalies
1228    /// Duplicate payment or invoice.
1229    DuplicatePayment,
1230    /// Unauthorized transaction.
1231    UnauthorizedTransaction,
1232    /// Structured transactions to avoid thresholds.
1233    StructuredTransaction,
1234
1235    // Pattern-based anomalies
1236    /// Circular flow of funds.
1237    CircularFlow,
1238    /// Behavioral anomaly (deviation from normal patterns).
1239    BehavioralAnomaly,
1240    /// Timing-based anomaly.
1241    TimingAnomaly,
1242
1243    // Journal entry anomalies
1244    /// Manual journal entry anomaly.
1245    JournalAnomaly,
1246    /// Manual override of controls.
1247    ManualOverride,
1248    /// Missing approval in chain.
1249    MissingApproval,
1250
1251    // Statistical anomalies
1252    /// Statistical outlier.
1253    StatisticalOutlier,
1254    /// Distribution anomaly (Benford, etc.).
1255    DistributionAnomaly,
1256
1257    // Custom category
1258    /// User-defined category.
1259    Custom(String),
1260}
1261
1262impl AnomalyCategory {
1263    /// Derives an AnomalyCategory from an AnomalyType.
1264    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1265        match anomaly_type {
1266            AnomalyType::Fraud(fraud_type) => match fraud_type {
1267                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1268                    AnomalyCategory::FictitiousVendor
1269                }
1270                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1271                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1272                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1273                    AnomalyCategory::StructuredTransaction
1274                }
1275                FraudType::SelfApproval
1276                | FraudType::UnauthorizedApproval
1277                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1278                FraudType::TimingAnomaly
1279                | FraudType::RoundDollarManipulation
1280                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1281                _ => AnomalyCategory::BehavioralAnomaly,
1282            },
1283            AnomalyType::Error(error_type) => match error_type {
1284                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1285                ErrorType::WrongPeriod
1286                | ErrorType::BackdatedEntry
1287                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1288                _ => AnomalyCategory::JournalAnomaly,
1289            },
1290            AnomalyType::ProcessIssue(process_type) => match process_type {
1291                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1292                    AnomalyCategory::MissingApproval
1293                }
1294                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1295                    AnomalyCategory::ManualOverride
1296                }
1297                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1298                    AnomalyCategory::TimingAnomaly
1299                }
1300                _ => AnomalyCategory::BehavioralAnomaly,
1301            },
1302            AnomalyType::Statistical(stat_type) => match stat_type {
1303                StatisticalAnomalyType::BenfordViolation
1304                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1305                _ => AnomalyCategory::StatisticalOutlier,
1306            },
1307            AnomalyType::Relational(rel_type) => match rel_type {
1308                RelationalAnomalyType::CircularTransaction
1309                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1310                _ => AnomalyCategory::BehavioralAnomaly,
1311            },
1312            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1313        }
1314    }
1315
1316    /// Returns the category name as a string.
1317    pub fn name(&self) -> &str {
1318        match self {
1319            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1320            AnomalyCategory::VendorKickback => "vendor_kickback",
1321            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1322            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1323            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1324            AnomalyCategory::StructuredTransaction => "structured_transaction",
1325            AnomalyCategory::CircularFlow => "circular_flow",
1326            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1327            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1328            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1329            AnomalyCategory::ManualOverride => "manual_override",
1330            AnomalyCategory::MissingApproval => "missing_approval",
1331            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1332            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1333            AnomalyCategory::Custom(s) => s.as_str(),
1334        }
1335    }
1336
1337    /// Returns the ordinal value for ML encoding.
1338    pub fn ordinal(&self) -> u8 {
1339        match self {
1340            AnomalyCategory::FictitiousVendor => 0,
1341            AnomalyCategory::VendorKickback => 1,
1342            AnomalyCategory::RelatedPartyVendor => 2,
1343            AnomalyCategory::DuplicatePayment => 3,
1344            AnomalyCategory::UnauthorizedTransaction => 4,
1345            AnomalyCategory::StructuredTransaction => 5,
1346            AnomalyCategory::CircularFlow => 6,
1347            AnomalyCategory::BehavioralAnomaly => 7,
1348            AnomalyCategory::TimingAnomaly => 8,
1349            AnomalyCategory::JournalAnomaly => 9,
1350            AnomalyCategory::ManualOverride => 10,
1351            AnomalyCategory::MissingApproval => 11,
1352            AnomalyCategory::StatisticalOutlier => 12,
1353            AnomalyCategory::DistributionAnomaly => 13,
1354            AnomalyCategory::Custom(_) => 14,
1355        }
1356    }
1357
1358    /// Returns the total number of categories (excluding Custom).
1359    pub fn category_count() -> usize {
1360        15 // 14 fixed categories + Custom
1361    }
1362}
1363
1364/// Type of contributing factor for anomaly confidence/severity calculation.
1365#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1366pub enum FactorType {
1367    /// Amount deviation from expected value.
1368    AmountDeviation,
1369    /// Proximity to approval/reporting threshold.
1370    ThresholdProximity,
1371    /// Timing-related anomaly indicator.
1372    TimingAnomaly,
1373    /// Entity risk score contribution.
1374    EntityRisk,
1375    /// Pattern match confidence.
1376    PatternMatch,
1377    /// Frequency deviation from normal.
1378    FrequencyDeviation,
1379    /// Relationship-based anomaly indicator.
1380    RelationshipAnomaly,
1381    /// Control bypass indicator.
1382    ControlBypass,
1383    /// Benford's Law violation.
1384    BenfordViolation,
1385    /// Duplicate indicator.
1386    DuplicateIndicator,
1387    /// Approval chain issue.
1388    ApprovalChainIssue,
1389    /// Documentation gap.
1390    DocumentationGap,
1391    /// Custom factor type.
1392    Custom,
1393}
1394
1395impl FactorType {
1396    /// Returns the factor type name.
1397    pub fn name(&self) -> &'static str {
1398        match self {
1399            FactorType::AmountDeviation => "amount_deviation",
1400            FactorType::ThresholdProximity => "threshold_proximity",
1401            FactorType::TimingAnomaly => "timing_anomaly",
1402            FactorType::EntityRisk => "entity_risk",
1403            FactorType::PatternMatch => "pattern_match",
1404            FactorType::FrequencyDeviation => "frequency_deviation",
1405            FactorType::RelationshipAnomaly => "relationship_anomaly",
1406            FactorType::ControlBypass => "control_bypass",
1407            FactorType::BenfordViolation => "benford_violation",
1408            FactorType::DuplicateIndicator => "duplicate_indicator",
1409            FactorType::ApprovalChainIssue => "approval_chain_issue",
1410            FactorType::DocumentationGap => "documentation_gap",
1411            FactorType::Custom => "custom",
1412        }
1413    }
1414}
1415
1416/// Evidence supporting a contributing factor.
1417#[derive(Debug, Clone, Serialize, Deserialize)]
1418pub struct FactorEvidence {
1419    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1420    pub source: String,
1421    /// Raw evidence data.
1422    pub data: HashMap<String, String>,
1423}
1424
1425/// A contributing factor to anomaly confidence/severity.
1426#[derive(Debug, Clone, Serialize, Deserialize)]
1427pub struct ContributingFactor {
1428    /// Type of factor.
1429    pub factor_type: FactorType,
1430    /// Observed value.
1431    pub value: f64,
1432    /// Threshold or expected value.
1433    pub threshold: f64,
1434    /// Direction of comparison (true = value > threshold is anomalous).
1435    pub direction_greater: bool,
1436    /// Weight of this factor in overall calculation (0.0 - 1.0).
1437    pub weight: f64,
1438    /// Human-readable description.
1439    pub description: String,
1440    /// Optional supporting evidence.
1441    pub evidence: Option<FactorEvidence>,
1442}
1443
1444impl ContributingFactor {
1445    /// Creates a new contributing factor.
1446    pub fn new(
1447        factor_type: FactorType,
1448        value: f64,
1449        threshold: f64,
1450        direction_greater: bool,
1451        weight: f64,
1452        description: &str,
1453    ) -> Self {
1454        Self {
1455            factor_type,
1456            value,
1457            threshold,
1458            direction_greater,
1459            weight,
1460            description: description.to_string(),
1461            evidence: None,
1462        }
1463    }
1464
1465    /// Adds evidence to the factor.
1466    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1467        self.evidence = Some(FactorEvidence {
1468            source: source.to_string(),
1469            data,
1470        });
1471        self
1472    }
1473
1474    /// Calculates the factor's contribution to anomaly score.
1475    pub fn contribution(&self) -> f64 {
1476        let deviation = if self.direction_greater {
1477            (self.value - self.threshold).max(0.0)
1478        } else {
1479            (self.threshold - self.value).max(0.0)
1480        };
1481
1482        // Normalize by threshold to get relative deviation
1483        let relative_deviation = if self.threshold.abs() > 0.001 {
1484            deviation / self.threshold.abs()
1485        } else {
1486            deviation
1487        };
1488
1489        // Apply weight and cap at 1.0
1490        (relative_deviation * self.weight).min(1.0)
1491    }
1492}
1493
1494/// Enhanced anomaly label with dynamic confidence and severity.
1495#[derive(Debug, Clone, Serialize, Deserialize)]
1496pub struct EnhancedAnomalyLabel {
1497    /// Base labeled anomaly (backward compatible).
1498    pub base: LabeledAnomaly,
1499    /// Enhanced category classification.
1500    pub category: AnomalyCategory,
1501    /// Dynamically calculated confidence (0.0 - 1.0).
1502    pub enhanced_confidence: f64,
1503    /// Contextually calculated severity (0.0 - 1.0).
1504    pub enhanced_severity: f64,
1505    /// Factors contributing to confidence/severity.
1506    pub contributing_factors: Vec<ContributingFactor>,
1507    /// Secondary categories (for multi-label classification).
1508    pub secondary_categories: Vec<AnomalyCategory>,
1509}
1510
1511impl EnhancedAnomalyLabel {
1512    /// Creates an enhanced label from a base labeled anomaly.
1513    pub fn from_base(base: LabeledAnomaly) -> Self {
1514        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1515        let enhanced_confidence = base.confidence;
1516        let enhanced_severity = base.severity as f64 / 5.0;
1517
1518        Self {
1519            base,
1520            category,
1521            enhanced_confidence,
1522            enhanced_severity,
1523            contributing_factors: Vec::new(),
1524            secondary_categories: Vec::new(),
1525        }
1526    }
1527
1528    /// Sets the enhanced confidence.
1529    pub fn with_confidence(mut self, confidence: f64) -> Self {
1530        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1531        self
1532    }
1533
1534    /// Sets the enhanced severity.
1535    pub fn with_severity(mut self, severity: f64) -> Self {
1536        self.enhanced_severity = severity.clamp(0.0, 1.0);
1537        self
1538    }
1539
1540    /// Adds a contributing factor.
1541    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1542        self.contributing_factors.push(factor);
1543        self
1544    }
1545
1546    /// Adds a secondary category.
1547    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1548        if !self.secondary_categories.contains(&category) && category != self.category {
1549            self.secondary_categories.push(category);
1550        }
1551        self
1552    }
1553
1554    /// Converts to an extended feature vector.
1555    ///
1556    /// Returns base features (15) + enhanced features (10) = 25 features.
1557    pub fn to_features(&self) -> Vec<f64> {
1558        let mut features = self.base.to_features();
1559
1560        // Enhanced features
1561        features.push(self.enhanced_confidence);
1562        features.push(self.enhanced_severity);
1563        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1564        features.push(self.secondary_categories.len() as f64);
1565        features.push(self.contributing_factors.len() as f64);
1566
1567        // Max factor weight
1568        let max_weight = self
1569            .contributing_factors
1570            .iter()
1571            .map(|f| f.weight)
1572            .fold(0.0, f64::max);
1573        features.push(max_weight);
1574
1575        // Factor type indicators (binary flags for key factor types)
1576        let has_control_bypass = self
1577            .contributing_factors
1578            .iter()
1579            .any(|f| f.factor_type == FactorType::ControlBypass);
1580        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1581
1582        let has_amount_deviation = self
1583            .contributing_factors
1584            .iter()
1585            .any(|f| f.factor_type == FactorType::AmountDeviation);
1586        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1587
1588        let has_timing = self
1589            .contributing_factors
1590            .iter()
1591            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1592        features.push(if has_timing { 1.0 } else { 0.0 });
1593
1594        let has_pattern_match = self
1595            .contributing_factors
1596            .iter()
1597            .any(|f| f.factor_type == FactorType::PatternMatch);
1598        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1599
1600        features
1601    }
1602
1603    /// Returns the number of features in the enhanced feature vector.
1604    pub fn feature_count() -> usize {
1605        25 // 15 base + 10 enhanced
1606    }
1607
1608    /// Returns feature names for the enhanced feature vector.
1609    pub fn feature_names() -> Vec<&'static str> {
1610        let mut names = LabeledAnomaly::feature_names();
1611        names.extend(vec![
1612            "enhanced_confidence",
1613            "enhanced_severity",
1614            "category_ordinal",
1615            "secondary_category_count",
1616            "contributing_factor_count",
1617            "max_factor_weight",
1618            "has_control_bypass",
1619            "has_amount_deviation",
1620            "has_timing_factor",
1621            "has_pattern_match",
1622        ]);
1623        names
1624    }
1625}
1626
1627// ============================================================================
1628// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1629// ============================================================================
1630
1631/// Severity level classification for anomalies.
1632#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1633pub enum SeverityLevel {
1634    /// Minor issue, low impact.
1635    Low,
1636    /// Moderate issue, noticeable impact.
1637    #[default]
1638    Medium,
1639    /// Significant issue, substantial impact.
1640    High,
1641    /// Critical issue, severe impact requiring immediate attention.
1642    Critical,
1643}
1644
1645impl SeverityLevel {
1646    /// Returns the numeric value (1-4) for the severity level.
1647    pub fn numeric(&self) -> u8 {
1648        match self {
1649            SeverityLevel::Low => 1,
1650            SeverityLevel::Medium => 2,
1651            SeverityLevel::High => 3,
1652            SeverityLevel::Critical => 4,
1653        }
1654    }
1655
1656    /// Creates a severity level from a numeric value.
1657    pub fn from_numeric(value: u8) -> Self {
1658        match value {
1659            1 => SeverityLevel::Low,
1660            2 => SeverityLevel::Medium,
1661            3 => SeverityLevel::High,
1662            _ => SeverityLevel::Critical,
1663        }
1664    }
1665
1666    /// Creates a severity level from a normalized score (0.0-1.0).
1667    pub fn from_score(score: f64) -> Self {
1668        match score {
1669            s if s < 0.25 => SeverityLevel::Low,
1670            s if s < 0.50 => SeverityLevel::Medium,
1671            s if s < 0.75 => SeverityLevel::High,
1672            _ => SeverityLevel::Critical,
1673        }
1674    }
1675
1676    /// Returns a normalized score (0.0-1.0) for this severity level.
1677    pub fn to_score(&self) -> f64 {
1678        match self {
1679            SeverityLevel::Low => 0.125,
1680            SeverityLevel::Medium => 0.375,
1681            SeverityLevel::High => 0.625,
1682            SeverityLevel::Critical => 0.875,
1683        }
1684    }
1685}
1686
1687/// Structured severity scoring for anomalies.
1688#[derive(Debug, Clone, Serialize, Deserialize)]
1689pub struct AnomalySeverity {
1690    /// Severity level classification.
1691    pub level: SeverityLevel,
1692    /// Continuous severity score (0.0-1.0).
1693    pub score: f64,
1694    /// Absolute financial impact amount.
1695    pub financial_impact: Decimal,
1696    /// Whether this exceeds materiality threshold.
1697    pub is_material: bool,
1698    /// Materiality threshold used for determination.
1699    #[serde(default, skip_serializing_if = "Option::is_none")]
1700    pub materiality_threshold: Option<Decimal>,
1701}
1702
1703impl AnomalySeverity {
1704    /// Creates a new severity assessment.
1705    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1706        Self {
1707            level,
1708            score: level.to_score(),
1709            financial_impact,
1710            is_material: false,
1711            materiality_threshold: None,
1712        }
1713    }
1714
1715    /// Creates severity from a score, auto-determining level.
1716    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1717        Self {
1718            level: SeverityLevel::from_score(score),
1719            score: score.clamp(0.0, 1.0),
1720            financial_impact,
1721            is_material: false,
1722            materiality_threshold: None,
1723        }
1724    }
1725
1726    /// Sets the materiality assessment.
1727    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1728        self.materiality_threshold = Some(threshold);
1729        self.is_material = self.financial_impact.abs() >= threshold;
1730        self
1731    }
1732}
1733
1734impl Default for AnomalySeverity {
1735    fn default() -> Self {
1736        Self {
1737            level: SeverityLevel::Medium,
1738            score: 0.5,
1739            financial_impact: Decimal::ZERO,
1740            is_material: false,
1741            materiality_threshold: None,
1742        }
1743    }
1744}
1745
1746/// Detection difficulty classification for anomalies.
1747///
1748/// Categorizes how difficult an anomaly is to detect, which is useful
1749/// for ML model benchmarking and audit procedure selection.
1750///
1751/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1752/// is used for drift event classification and has different variants.
1753#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1754pub enum AnomalyDetectionDifficulty {
1755    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1756    Trivial,
1757    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1758    Easy,
1759    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1760    #[default]
1761    Moderate,
1762    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1763    Hard,
1764    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1765    Expert,
1766}
1767
1768impl AnomalyDetectionDifficulty {
1769    /// Returns the expected detection rate for this difficulty level.
1770    pub fn expected_detection_rate(&self) -> f64 {
1771        match self {
1772            AnomalyDetectionDifficulty::Trivial => 0.99,
1773            AnomalyDetectionDifficulty::Easy => 0.90,
1774            AnomalyDetectionDifficulty::Moderate => 0.70,
1775            AnomalyDetectionDifficulty::Hard => 0.40,
1776            AnomalyDetectionDifficulty::Expert => 0.15,
1777        }
1778    }
1779
1780    /// Returns a numeric difficulty score (0.0-1.0).
1781    pub fn difficulty_score(&self) -> f64 {
1782        match self {
1783            AnomalyDetectionDifficulty::Trivial => 0.05,
1784            AnomalyDetectionDifficulty::Easy => 0.25,
1785            AnomalyDetectionDifficulty::Moderate => 0.50,
1786            AnomalyDetectionDifficulty::Hard => 0.75,
1787            AnomalyDetectionDifficulty::Expert => 0.95,
1788        }
1789    }
1790
1791    /// Creates a difficulty level from a score (0.0-1.0).
1792    pub fn from_score(score: f64) -> Self {
1793        match score {
1794            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1795            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1796            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1797            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1798            _ => AnomalyDetectionDifficulty::Expert,
1799        }
1800    }
1801
1802    /// Returns the name of this difficulty level.
1803    pub fn name(&self) -> &'static str {
1804        match self {
1805            AnomalyDetectionDifficulty::Trivial => "trivial",
1806            AnomalyDetectionDifficulty::Easy => "easy",
1807            AnomalyDetectionDifficulty::Moderate => "moderate",
1808            AnomalyDetectionDifficulty::Hard => "hard",
1809            AnomalyDetectionDifficulty::Expert => "expert",
1810        }
1811    }
1812}
1813
1814/// Ground truth certainty level for anomaly labels.
1815///
1816/// Indicates how certain we are that the label is correct.
1817#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1818pub enum GroundTruthCertainty {
1819    /// Definitively known (injected anomaly with full provenance).
1820    #[default]
1821    Definite,
1822    /// Highly probable based on strong evidence.
1823    Probable,
1824    /// Possibly an anomaly based on indirect evidence.
1825    Possible,
1826}
1827
1828impl GroundTruthCertainty {
1829    /// Returns a certainty score (0.0-1.0).
1830    pub fn certainty_score(&self) -> f64 {
1831        match self {
1832            GroundTruthCertainty::Definite => 1.0,
1833            GroundTruthCertainty::Probable => 0.8,
1834            GroundTruthCertainty::Possible => 0.5,
1835        }
1836    }
1837
1838    /// Returns the name of this certainty level.
1839    pub fn name(&self) -> &'static str {
1840        match self {
1841            GroundTruthCertainty::Definite => "definite",
1842            GroundTruthCertainty::Probable => "probable",
1843            GroundTruthCertainty::Possible => "possible",
1844        }
1845    }
1846}
1847
1848/// Detection method classification.
1849///
1850/// Indicates which detection methods are recommended or effective for an anomaly.
1851#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1852pub enum DetectionMethod {
1853    /// Simple rule-based detection (thresholds, filters).
1854    RuleBased,
1855    /// Statistical analysis (distributions, outlier detection).
1856    Statistical,
1857    /// Machine learning models (classification, anomaly detection).
1858    MachineLearning,
1859    /// Graph-based analysis (network patterns, relationships).
1860    GraphBased,
1861    /// Manual forensic audit procedures.
1862    ForensicAudit,
1863    /// Combination of multiple methods.
1864    Hybrid,
1865}
1866
1867impl DetectionMethod {
1868    /// Returns the name of this detection method.
1869    pub fn name(&self) -> &'static str {
1870        match self {
1871            DetectionMethod::RuleBased => "rule_based",
1872            DetectionMethod::Statistical => "statistical",
1873            DetectionMethod::MachineLearning => "machine_learning",
1874            DetectionMethod::GraphBased => "graph_based",
1875            DetectionMethod::ForensicAudit => "forensic_audit",
1876            DetectionMethod::Hybrid => "hybrid",
1877        }
1878    }
1879
1880    /// Returns a description of this detection method.
1881    pub fn description(&self) -> &'static str {
1882        match self {
1883            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1884            DetectionMethod::Statistical => "Statistical distribution analysis",
1885            DetectionMethod::MachineLearning => "ML classification models",
1886            DetectionMethod::GraphBased => "Network and relationship analysis",
1887            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1888            DetectionMethod::Hybrid => "Combined multi-method approach",
1889        }
1890    }
1891}
1892
1893/// Extended anomaly label with comprehensive multi-dimensional classification.
1894///
1895/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1896/// severity scoring, detection difficulty, recommended methods, and ground truth.
1897#[derive(Debug, Clone, Serialize, Deserialize)]
1898pub struct ExtendedAnomalyLabel {
1899    /// Base labeled anomaly.
1900    pub base: LabeledAnomaly,
1901    /// Enhanced category classification.
1902    pub category: AnomalyCategory,
1903    /// Structured severity assessment.
1904    pub severity: AnomalySeverity,
1905    /// Detection difficulty classification.
1906    pub detection_difficulty: AnomalyDetectionDifficulty,
1907    /// Recommended detection methods for this anomaly.
1908    pub recommended_methods: Vec<DetectionMethod>,
1909    /// Key indicators that should trigger detection.
1910    pub key_indicators: Vec<String>,
1911    /// Ground truth certainty level.
1912    pub ground_truth_certainty: GroundTruthCertainty,
1913    /// Contributing factors to confidence/severity.
1914    pub contributing_factors: Vec<ContributingFactor>,
1915    /// Related entity IDs (vendors, customers, employees, etc.).
1916    pub related_entity_ids: Vec<String>,
1917    /// Secondary categories for multi-label classification.
1918    pub secondary_categories: Vec<AnomalyCategory>,
1919    /// Scheme ID if part of a multi-stage fraud scheme.
1920    #[serde(default, skip_serializing_if = "Option::is_none")]
1921    pub scheme_id: Option<String>,
1922    /// Stage number within a scheme (1-indexed).
1923    #[serde(default, skip_serializing_if = "Option::is_none")]
1924    pub scheme_stage: Option<u32>,
1925    /// Whether this is a near-miss (suspicious but legitimate).
1926    #[serde(default)]
1927    pub is_near_miss: bool,
1928    /// Explanation if this is a near-miss.
1929    #[serde(default, skip_serializing_if = "Option::is_none")]
1930    pub near_miss_explanation: Option<String>,
1931}
1932
1933impl ExtendedAnomalyLabel {
1934    /// Creates an extended label from a base labeled anomaly.
1935    pub fn from_base(base: LabeledAnomaly) -> Self {
1936        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1937        let severity = AnomalySeverity {
1938            level: SeverityLevel::from_numeric(base.severity),
1939            score: base.severity as f64 / 5.0,
1940            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1941            is_material: false,
1942            materiality_threshold: None,
1943        };
1944
1945        Self {
1946            base,
1947            category,
1948            severity,
1949            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1950            recommended_methods: vec![DetectionMethod::RuleBased],
1951            key_indicators: Vec::new(),
1952            ground_truth_certainty: GroundTruthCertainty::Definite,
1953            contributing_factors: Vec::new(),
1954            related_entity_ids: Vec::new(),
1955            secondary_categories: Vec::new(),
1956            scheme_id: None,
1957            scheme_stage: None,
1958            is_near_miss: false,
1959            near_miss_explanation: None,
1960        }
1961    }
1962
1963    /// Sets the severity assessment.
1964    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1965        self.severity = severity;
1966        self
1967    }
1968
1969    /// Sets the detection difficulty.
1970    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1971        self.detection_difficulty = difficulty;
1972        self
1973    }
1974
1975    /// Adds a recommended detection method.
1976    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1977        if !self.recommended_methods.contains(&method) {
1978            self.recommended_methods.push(method);
1979        }
1980        self
1981    }
1982
1983    /// Sets the recommended detection methods.
1984    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1985        self.recommended_methods = methods;
1986        self
1987    }
1988
1989    /// Adds a key indicator.
1990    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1991        self.key_indicators.push(indicator.into());
1992        self
1993    }
1994
1995    /// Sets the ground truth certainty.
1996    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1997        self.ground_truth_certainty = certainty;
1998        self
1999    }
2000
2001    /// Adds a contributing factor.
2002    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
2003        self.contributing_factors.push(factor);
2004        self
2005    }
2006
2007    /// Adds a related entity ID.
2008    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
2009        self.related_entity_ids.push(entity_id.into());
2010        self
2011    }
2012
2013    /// Adds a secondary category.
2014    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2015        if category != self.category && !self.secondary_categories.contains(&category) {
2016            self.secondary_categories.push(category);
2017        }
2018        self
2019    }
2020
2021    /// Sets scheme information.
2022    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2023        self.scheme_id = Some(scheme_id.into());
2024        self.scheme_stage = Some(stage);
2025        self
2026    }
2027
2028    /// Marks this as a near-miss with explanation.
2029    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2030        self.is_near_miss = true;
2031        self.near_miss_explanation = Some(explanation.into());
2032        self
2033    }
2034
2035    /// Converts to an extended feature vector for ML.
2036    ///
2037    /// Returns base features (15) + extended features (15) = 30 features.
2038    pub fn to_features(&self) -> Vec<f64> {
2039        let mut features = self.base.to_features();
2040
2041        // Extended features
2042        features.push(self.severity.score);
2043        features.push(self.severity.level.to_score());
2044        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2045        features.push(self.detection_difficulty.difficulty_score());
2046        features.push(self.detection_difficulty.expected_detection_rate());
2047        features.push(self.ground_truth_certainty.certainty_score());
2048        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2049        features.push(self.secondary_categories.len() as f64);
2050        features.push(self.contributing_factors.len() as f64);
2051        features.push(self.key_indicators.len() as f64);
2052        features.push(self.recommended_methods.len() as f64);
2053        features.push(self.related_entity_ids.len() as f64);
2054        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2055        features.push(self.scheme_stage.unwrap_or(0) as f64);
2056        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2057
2058        features
2059    }
2060
2061    /// Returns the number of features in the extended feature vector.
2062    pub fn feature_count() -> usize {
2063        30 // 15 base + 15 extended
2064    }
2065
2066    /// Returns feature names for the extended feature vector.
2067    pub fn feature_names() -> Vec<&'static str> {
2068        let mut names = LabeledAnomaly::feature_names();
2069        names.extend(vec![
2070            "severity_score",
2071            "severity_level_score",
2072            "is_material",
2073            "difficulty_score",
2074            "expected_detection_rate",
2075            "ground_truth_certainty",
2076            "category_ordinal",
2077            "secondary_category_count",
2078            "contributing_factor_count",
2079            "key_indicator_count",
2080            "recommended_method_count",
2081            "related_entity_count",
2082            "is_part_of_scheme",
2083            "scheme_stage",
2084            "is_near_miss",
2085        ]);
2086        names
2087    }
2088}
2089
2090// ============================================================================
2091// MULTI-STAGE FRAUD SCHEME TYPES
2092// ============================================================================
2093
2094/// Type of multi-stage fraud scheme.
2095#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2096pub enum SchemeType {
2097    /// Gradual embezzlement over time.
2098    GradualEmbezzlement,
2099    /// Revenue manipulation across periods.
2100    RevenueManipulation,
2101    /// Vendor kickback scheme.
2102    VendorKickback,
2103    /// Round-tripping funds through multiple entities.
2104    RoundTripping,
2105    /// Ghost employee scheme.
2106    GhostEmployee,
2107    /// Expense reimbursement fraud.
2108    ExpenseReimbursement,
2109    /// Inventory theft scheme.
2110    InventoryTheft,
2111    /// Custom scheme type.
2112    Custom,
2113}
2114
2115impl SchemeType {
2116    /// Returns the name of this scheme type.
2117    pub fn name(&self) -> &'static str {
2118        match self {
2119            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2120            SchemeType::RevenueManipulation => "revenue_manipulation",
2121            SchemeType::VendorKickback => "vendor_kickback",
2122            SchemeType::RoundTripping => "round_tripping",
2123            SchemeType::GhostEmployee => "ghost_employee",
2124            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2125            SchemeType::InventoryTheft => "inventory_theft",
2126            SchemeType::Custom => "custom",
2127        }
2128    }
2129
2130    /// Returns the typical number of stages for this scheme type.
2131    pub fn typical_stages(&self) -> u32 {
2132        match self {
2133            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2134            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2135            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2136            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2137            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2138            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2139            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2140            SchemeType::Custom => 4,
2141        }
2142    }
2143}
2144
2145/// Status of detection for a fraud scheme.
2146#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2147pub enum SchemeDetectionStatus {
2148    /// Scheme is undetected.
2149    #[default]
2150    Undetected,
2151    /// Under investigation but not confirmed.
2152    UnderInvestigation,
2153    /// Partially detected (some transactions flagged).
2154    PartiallyDetected,
2155    /// Fully detected and confirmed.
2156    FullyDetected,
2157}
2158
2159/// Reference to a transaction within a scheme.
2160#[derive(Debug, Clone, Serialize, Deserialize)]
2161pub struct SchemeTransactionRef {
2162    /// Document ID of the transaction.
2163    pub document_id: String,
2164    /// Transaction date.
2165    pub date: chrono::NaiveDate,
2166    /// Transaction amount.
2167    pub amount: Decimal,
2168    /// Stage this transaction belongs to.
2169    pub stage: u32,
2170    /// Anomaly ID if labeled.
2171    #[serde(default, skip_serializing_if = "Option::is_none")]
2172    pub anomaly_id: Option<String>,
2173}
2174
2175/// Concealment technique used in fraud.
2176#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2177pub enum ConcealmentTechnique {
2178    /// Document manipulation or forgery.
2179    DocumentManipulation,
2180    /// Circumventing approval processes.
2181    ApprovalCircumvention,
2182    /// Exploiting timing (period-end, holidays).
2183    TimingExploitation,
2184    /// Transaction splitting to avoid thresholds.
2185    TransactionSplitting,
2186    /// Account misclassification.
2187    AccountMisclassification,
2188    /// Collusion with other employees.
2189    Collusion,
2190    /// Data alteration or deletion.
2191    DataAlteration,
2192    /// Creating false documentation.
2193    FalseDocumentation,
2194}
2195
2196impl ConcealmentTechnique {
2197    /// Returns the difficulty bonus this technique adds.
2198    pub fn difficulty_bonus(&self) -> f64 {
2199        match self {
2200            ConcealmentTechnique::DocumentManipulation => 0.20,
2201            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2202            ConcealmentTechnique::TimingExploitation => 0.10,
2203            ConcealmentTechnique::TransactionSplitting => 0.15,
2204            ConcealmentTechnique::AccountMisclassification => 0.10,
2205            ConcealmentTechnique::Collusion => 0.25,
2206            ConcealmentTechnique::DataAlteration => 0.20,
2207            ConcealmentTechnique::FalseDocumentation => 0.15,
2208        }
2209    }
2210}
2211
2212// ============================================================================
2213// ACFE-ALIGNED FRAUD TAXONOMY
2214// ============================================================================
2215//
2216// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2217// Nations: Occupational Fraud Classification System. This taxonomy provides
2218// ACFE-aligned categories, schemes, and calibration data.
2219
2220/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2221///
2222/// ACFE Report to the Nations statistics (typical):
2223/// - Asset Misappropriation: 86% of cases, $100k median loss
2224/// - Corruption: 33% of cases, $150k median loss
2225/// - Financial Statement Fraud: 10% of cases, $954k median loss
2226///
2227/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2228#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2229pub enum AcfeFraudCategory {
2230    /// Theft of organizational assets (cash, inventory, equipment).
2231    /// Most common (86% of cases) but typically lowest median loss ($100k).
2232    #[default]
2233    AssetMisappropriation,
2234    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2235    /// Medium frequency (33% of cases), medium median loss ($150k).
2236    Corruption,
2237    /// Intentional misstatement of financial statements.
2238    /// Least common (10% of cases) but highest median loss ($954k).
2239    FinancialStatementFraud,
2240}
2241
2242impl AcfeFraudCategory {
2243    /// Returns the name of this category.
2244    pub fn name(&self) -> &'static str {
2245        match self {
2246            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2247            AcfeFraudCategory::Corruption => "corruption",
2248            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2249        }
2250    }
2251
2252    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2253    pub fn typical_occurrence_rate(&self) -> f64 {
2254        match self {
2255            AcfeFraudCategory::AssetMisappropriation => 0.86,
2256            AcfeFraudCategory::Corruption => 0.33,
2257            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2258        }
2259    }
2260
2261    /// Returns the typical median loss amount (from ACFE reports).
2262    pub fn typical_median_loss(&self) -> Decimal {
2263        match self {
2264            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2265            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2266            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2267        }
2268    }
2269
2270    /// Returns the typical detection time in months (from ACFE reports).
2271    pub fn typical_detection_months(&self) -> u32 {
2272        match self {
2273            AcfeFraudCategory::AssetMisappropriation => 12,
2274            AcfeFraudCategory::Corruption => 18,
2275            AcfeFraudCategory::FinancialStatementFraud => 24,
2276        }
2277    }
2278}
2279
2280/// Cash-based fraud schemes under Asset Misappropriation.
2281///
2282/// Organized according to the ACFE Fraud Tree:
2283/// - Theft of Cash on Hand
2284/// - Theft of Cash Receipts
2285/// - Fraudulent Disbursements
2286#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2287pub enum CashFraudScheme {
2288    // ========== Theft of Cash on Hand ==========
2289    /// Stealing cash from cash drawers or safes after it has been recorded.
2290    Larceny,
2291    /// Stealing cash before it is recorded in the books (intercepts receipts).
2292    Skimming,
2293
2294    // ========== Theft of Cash Receipts ==========
2295    /// Skimming from sales transactions before recording.
2296    SalesSkimming,
2297    /// Intercepting customer payments on accounts receivable.
2298    ReceivablesSkimming,
2299    /// Creating false refunds to pocket the difference.
2300    RefundSchemes,
2301
2302    // ========== Fraudulent Disbursements - Billing Schemes ==========
2303    /// Creating fictitious vendors to invoice and pay.
2304    ShellCompany,
2305    /// Manipulating payments to legitimate vendors for personal gain.
2306    NonAccompliceVendor,
2307    /// Using company funds for personal purchases.
2308    PersonalPurchases,
2309
2310    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2311    /// Creating fake employees to collect wages.
2312    GhostEmployee,
2313    /// Falsifying hours worked, sales commissions, or salary rates.
2314    FalsifiedWages,
2315    /// Manipulating commission calculations.
2316    CommissionSchemes,
2317
2318    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2319    /// Claiming non-business expenses as business expenses.
2320    MischaracterizedExpenses,
2321    /// Inflating legitimate expense amounts.
2322    OverstatedExpenses,
2323    /// Creating completely fictitious expenses.
2324    FictitiousExpenses,
2325
2326    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2327    /// Forging the signature of an authorized check signer.
2328    ForgedMaker,
2329    /// Intercepting and altering the endorsement on legitimate checks.
2330    ForgedEndorsement,
2331    /// Altering the payee on a legitimate check.
2332    AlteredPayee,
2333    /// Authorized signer writing checks for personal benefit.
2334    AuthorizedMaker,
2335
2336    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2337    /// Creating false voided transactions.
2338    FalseVoids,
2339    /// Processing fictitious refunds.
2340    FalseRefunds,
2341}
2342
2343impl CashFraudScheme {
2344    /// Returns the ACFE category this scheme belongs to.
2345    pub fn category(&self) -> AcfeFraudCategory {
2346        AcfeFraudCategory::AssetMisappropriation
2347    }
2348
2349    /// Returns the subcategory within the ACFE Fraud Tree.
2350    pub fn subcategory(&self) -> &'static str {
2351        match self {
2352            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2353            CashFraudScheme::SalesSkimming
2354            | CashFraudScheme::ReceivablesSkimming
2355            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2356            CashFraudScheme::ShellCompany
2357            | CashFraudScheme::NonAccompliceVendor
2358            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2359            CashFraudScheme::GhostEmployee
2360            | CashFraudScheme::FalsifiedWages
2361            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2362            CashFraudScheme::MischaracterizedExpenses
2363            | CashFraudScheme::OverstatedExpenses
2364            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2365            CashFraudScheme::ForgedMaker
2366            | CashFraudScheme::ForgedEndorsement
2367            | CashFraudScheme::AlteredPayee
2368            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2369            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2370        }
2371    }
2372
2373    /// Returns the typical severity (1-5) for this scheme.
2374    pub fn severity(&self) -> u8 {
2375        match self {
2376            // Lower severity - often small amounts, easier to detect
2377            CashFraudScheme::FalseVoids
2378            | CashFraudScheme::FalseRefunds
2379            | CashFraudScheme::MischaracterizedExpenses => 3,
2380            // Medium severity
2381            CashFraudScheme::OverstatedExpenses
2382            | CashFraudScheme::Skimming
2383            | CashFraudScheme::Larceny
2384            | CashFraudScheme::PersonalPurchases
2385            | CashFraudScheme::FalsifiedWages => 4,
2386            // Higher severity - larger amounts, harder to detect
2387            CashFraudScheme::ShellCompany
2388            | CashFraudScheme::GhostEmployee
2389            | CashFraudScheme::FictitiousExpenses
2390            | CashFraudScheme::ForgedMaker
2391            | CashFraudScheme::AuthorizedMaker => 5,
2392            _ => 4,
2393        }
2394    }
2395
2396    /// Returns the typical detection difficulty.
2397    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2398        match self {
2399            // Easy to detect with basic controls
2400            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2401                AnomalyDetectionDifficulty::Easy
2402            }
2403            // Moderate - requires reconciliation
2404            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2405                AnomalyDetectionDifficulty::Moderate
2406            }
2407            // Hard - requires sophisticated analysis
2408            CashFraudScheme::Skimming
2409            | CashFraudScheme::ShellCompany
2410            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2411            // Expert level
2412            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2413                AnomalyDetectionDifficulty::Expert
2414            }
2415            _ => AnomalyDetectionDifficulty::Moderate,
2416        }
2417    }
2418
2419    /// Returns all variants for iteration.
2420    pub fn all_variants() -> &'static [CashFraudScheme] {
2421        &[
2422            CashFraudScheme::Larceny,
2423            CashFraudScheme::Skimming,
2424            CashFraudScheme::SalesSkimming,
2425            CashFraudScheme::ReceivablesSkimming,
2426            CashFraudScheme::RefundSchemes,
2427            CashFraudScheme::ShellCompany,
2428            CashFraudScheme::NonAccompliceVendor,
2429            CashFraudScheme::PersonalPurchases,
2430            CashFraudScheme::GhostEmployee,
2431            CashFraudScheme::FalsifiedWages,
2432            CashFraudScheme::CommissionSchemes,
2433            CashFraudScheme::MischaracterizedExpenses,
2434            CashFraudScheme::OverstatedExpenses,
2435            CashFraudScheme::FictitiousExpenses,
2436            CashFraudScheme::ForgedMaker,
2437            CashFraudScheme::ForgedEndorsement,
2438            CashFraudScheme::AlteredPayee,
2439            CashFraudScheme::AuthorizedMaker,
2440            CashFraudScheme::FalseVoids,
2441            CashFraudScheme::FalseRefunds,
2442        ]
2443    }
2444}
2445
2446/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2447#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2448pub enum AssetFraudScheme {
2449    // ========== Inventory Schemes ==========
2450    /// Misusing or converting inventory for personal benefit.
2451    InventoryMisuse,
2452    /// Stealing physical inventory items.
2453    InventoryTheft,
2454    /// Manipulating purchasing to facilitate theft.
2455    InventoryPurchasingScheme,
2456    /// Manipulating receiving/shipping to steal inventory.
2457    InventoryReceivingScheme,
2458
2459    // ========== Other Asset Schemes ==========
2460    /// Misusing company equipment or vehicles.
2461    EquipmentMisuse,
2462    /// Theft of company equipment, tools, or supplies.
2463    EquipmentTheft,
2464    /// Unauthorized access to or theft of intellectual property.
2465    IntellectualPropertyTheft,
2466    /// Using company time/resources for personal business.
2467    TimeTheft,
2468}
2469
2470impl AssetFraudScheme {
2471    /// Returns the ACFE category this scheme belongs to.
2472    pub fn category(&self) -> AcfeFraudCategory {
2473        AcfeFraudCategory::AssetMisappropriation
2474    }
2475
2476    /// Returns the subcategory within the ACFE Fraud Tree.
2477    pub fn subcategory(&self) -> &'static str {
2478        match self {
2479            AssetFraudScheme::InventoryMisuse
2480            | AssetFraudScheme::InventoryTheft
2481            | AssetFraudScheme::InventoryPurchasingScheme
2482            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2483            _ => "other_assets",
2484        }
2485    }
2486
2487    /// Returns the typical severity (1-5) for this scheme.
2488    pub fn severity(&self) -> u8 {
2489        match self {
2490            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2491            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2492            AssetFraudScheme::InventoryTheft
2493            | AssetFraudScheme::InventoryPurchasingScheme
2494            | AssetFraudScheme::InventoryReceivingScheme => 4,
2495            AssetFraudScheme::IntellectualPropertyTheft => 5,
2496        }
2497    }
2498}
2499
2500/// Corruption schemes under the ACFE Fraud Tree.
2501///
2502/// Corruption schemes involve the wrongful use of influence in a business
2503/// transaction to procure personal benefit.
2504#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2505pub enum CorruptionScheme {
2506    // ========== Conflicts of Interest ==========
2507    /// Employee has undisclosed financial interest in purchasing decisions.
2508    PurchasingConflict,
2509    /// Employee has undisclosed relationship with customer/vendor.
2510    SalesConflict,
2511    /// Employee owns or has interest in competing business.
2512    OutsideBusinessInterest,
2513    /// Employee makes decisions benefiting family members.
2514    NepotismConflict,
2515
2516    // ========== Bribery ==========
2517    /// Kickback payments from vendors for favorable treatment.
2518    InvoiceKickback,
2519    /// Collusion among vendors to inflate prices.
2520    BidRigging,
2521    /// Other cash payments for favorable decisions.
2522    CashBribery,
2523    /// Bribery of government officials.
2524    PublicOfficial,
2525
2526    // ========== Illegal Gratuities ==========
2527    /// Gifts given after favorable decisions (not agreed in advance).
2528    IllegalGratuity,
2529
2530    // ========== Economic Extortion ==========
2531    /// Demanding payment under threat of adverse action.
2532    EconomicExtortion,
2533}
2534
2535impl CorruptionScheme {
2536    /// Returns the ACFE category this scheme belongs to.
2537    pub fn category(&self) -> AcfeFraudCategory {
2538        AcfeFraudCategory::Corruption
2539    }
2540
2541    /// Returns the subcategory within the ACFE Fraud Tree.
2542    pub fn subcategory(&self) -> &'static str {
2543        match self {
2544            CorruptionScheme::PurchasingConflict
2545            | CorruptionScheme::SalesConflict
2546            | CorruptionScheme::OutsideBusinessInterest
2547            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2548            CorruptionScheme::InvoiceKickback
2549            | CorruptionScheme::BidRigging
2550            | CorruptionScheme::CashBribery
2551            | CorruptionScheme::PublicOfficial => "bribery",
2552            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2553            CorruptionScheme::EconomicExtortion => "economic_extortion",
2554        }
2555    }
2556
2557    /// Returns the typical severity (1-5) for this scheme.
2558    pub fn severity(&self) -> u8 {
2559        match self {
2560            // Lower severity conflicts of interest
2561            CorruptionScheme::NepotismConflict => 3,
2562            // Medium severity
2563            CorruptionScheme::PurchasingConflict
2564            | CorruptionScheme::SalesConflict
2565            | CorruptionScheme::OutsideBusinessInterest
2566            | CorruptionScheme::IllegalGratuity => 4,
2567            // High severity - active corruption
2568            CorruptionScheme::InvoiceKickback
2569            | CorruptionScheme::BidRigging
2570            | CorruptionScheme::CashBribery
2571            | CorruptionScheme::EconomicExtortion => 5,
2572            // Highest severity - involves public officials
2573            CorruptionScheme::PublicOfficial => 5,
2574        }
2575    }
2576
2577    /// Returns the typical detection difficulty.
2578    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2579        match self {
2580            // Easier to detect with proper disclosure requirements
2581            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2582                AnomalyDetectionDifficulty::Moderate
2583            }
2584            // Hard - requires transaction pattern analysis
2585            CorruptionScheme::PurchasingConflict
2586            | CorruptionScheme::SalesConflict
2587            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2588            // Expert level - deliberate concealment
2589            CorruptionScheme::InvoiceKickback
2590            | CorruptionScheme::CashBribery
2591            | CorruptionScheme::PublicOfficial
2592            | CorruptionScheme::IllegalGratuity
2593            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2594        }
2595    }
2596
2597    /// Returns all variants for iteration.
2598    pub fn all_variants() -> &'static [CorruptionScheme] {
2599        &[
2600            CorruptionScheme::PurchasingConflict,
2601            CorruptionScheme::SalesConflict,
2602            CorruptionScheme::OutsideBusinessInterest,
2603            CorruptionScheme::NepotismConflict,
2604            CorruptionScheme::InvoiceKickback,
2605            CorruptionScheme::BidRigging,
2606            CorruptionScheme::CashBribery,
2607            CorruptionScheme::PublicOfficial,
2608            CorruptionScheme::IllegalGratuity,
2609            CorruptionScheme::EconomicExtortion,
2610        ]
2611    }
2612}
2613
2614/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2615///
2616/// Financial statement fraud involves the intentional misstatement or omission
2617/// of material information in financial reports.
2618#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2619pub enum FinancialStatementScheme {
2620    // ========== Asset/Revenue Overstatement ==========
2621    /// Recording revenue before it is earned.
2622    PrematureRevenue,
2623    /// Deferring expenses to future periods.
2624    DelayedExpenses,
2625    /// Recording revenue for transactions that never occurred.
2626    FictitiousRevenues,
2627    /// Failing to record known liabilities.
2628    ConcealedLiabilities,
2629    /// Overstating the value of assets.
2630    ImproperAssetValuations,
2631    /// Omitting or misstating required disclosures.
2632    ImproperDisclosures,
2633    /// Manipulating timing of revenue recognition (channel stuffing).
2634    ChannelStuffing,
2635    /// Recognizing bill-and-hold revenue improperly.
2636    BillAndHold,
2637    /// Capitalizing expenses that should be expensed.
2638    ImproperCapitalization,
2639
2640    // ========== Asset/Revenue Understatement ==========
2641    /// Understating revenue (often for tax purposes).
2642    UnderstatedRevenues,
2643    /// Recording excessive expenses.
2644    OverstatedExpenses,
2645    /// Recording excessive liabilities or reserves.
2646    OverstatedLiabilities,
2647    /// Undervaluing assets for writedowns/reserves.
2648    ImproperAssetWritedowns,
2649}
2650
2651impl FinancialStatementScheme {
2652    /// Returns the ACFE category this scheme belongs to.
2653    pub fn category(&self) -> AcfeFraudCategory {
2654        AcfeFraudCategory::FinancialStatementFraud
2655    }
2656
2657    /// Returns the subcategory within the ACFE Fraud Tree.
2658    pub fn subcategory(&self) -> &'static str {
2659        match self {
2660            FinancialStatementScheme::UnderstatedRevenues
2661            | FinancialStatementScheme::OverstatedExpenses
2662            | FinancialStatementScheme::OverstatedLiabilities
2663            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2664            _ => "overstatement",
2665        }
2666    }
2667
2668    /// Returns the typical severity (1-5) for this scheme.
2669    pub fn severity(&self) -> u8 {
2670        // All financial statement fraud is high severity
2671        5
2672    }
2673
2674    /// Returns the typical detection difficulty.
2675    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2676        match self {
2677            // Easier to detect with good analytics
2678            FinancialStatementScheme::ChannelStuffing
2679            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2680            // Hard - requires deep analysis
2681            FinancialStatementScheme::PrematureRevenue
2682            | FinancialStatementScheme::ImproperCapitalization
2683            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2684            // Expert level
2685            FinancialStatementScheme::FictitiousRevenues
2686            | FinancialStatementScheme::ConcealedLiabilities
2687            | FinancialStatementScheme::ImproperAssetValuations
2688            | FinancialStatementScheme::ImproperDisclosures
2689            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2690            _ => AnomalyDetectionDifficulty::Hard,
2691        }
2692    }
2693
2694    /// Returns all variants for iteration.
2695    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2696        &[
2697            FinancialStatementScheme::PrematureRevenue,
2698            FinancialStatementScheme::DelayedExpenses,
2699            FinancialStatementScheme::FictitiousRevenues,
2700            FinancialStatementScheme::ConcealedLiabilities,
2701            FinancialStatementScheme::ImproperAssetValuations,
2702            FinancialStatementScheme::ImproperDisclosures,
2703            FinancialStatementScheme::ChannelStuffing,
2704            FinancialStatementScheme::BillAndHold,
2705            FinancialStatementScheme::ImproperCapitalization,
2706            FinancialStatementScheme::UnderstatedRevenues,
2707            FinancialStatementScheme::OverstatedExpenses,
2708            FinancialStatementScheme::OverstatedLiabilities,
2709            FinancialStatementScheme::ImproperAssetWritedowns,
2710        ]
2711    }
2712}
2713
2714/// Unified ACFE scheme type that encompasses all fraud schemes.
2715#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2716pub enum AcfeScheme {
2717    /// Cash-based fraud schemes.
2718    Cash(CashFraudScheme),
2719    /// Inventory and other asset fraud schemes.
2720    Asset(AssetFraudScheme),
2721    /// Corruption schemes.
2722    Corruption(CorruptionScheme),
2723    /// Financial statement fraud schemes.
2724    FinancialStatement(FinancialStatementScheme),
2725}
2726
2727impl AcfeScheme {
2728    /// Returns the ACFE category this scheme belongs to.
2729    pub fn category(&self) -> AcfeFraudCategory {
2730        match self {
2731            AcfeScheme::Cash(s) => s.category(),
2732            AcfeScheme::Asset(s) => s.category(),
2733            AcfeScheme::Corruption(s) => s.category(),
2734            AcfeScheme::FinancialStatement(s) => s.category(),
2735        }
2736    }
2737
2738    /// Returns the severity (1-5) for this scheme.
2739    pub fn severity(&self) -> u8 {
2740        match self {
2741            AcfeScheme::Cash(s) => s.severity(),
2742            AcfeScheme::Asset(s) => s.severity(),
2743            AcfeScheme::Corruption(s) => s.severity(),
2744            AcfeScheme::FinancialStatement(s) => s.severity(),
2745        }
2746    }
2747
2748    /// Returns the detection difficulty for this scheme.
2749    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2750        match self {
2751            AcfeScheme::Cash(s) => s.detection_difficulty(),
2752            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2753            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2754            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2755        }
2756    }
2757}
2758
2759/// How a fraud was detected (from ACFE statistics).
2760#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2761pub enum AcfeDetectionMethod {
2762    /// Tip from employee, customer, vendor, or anonymous source.
2763    Tip,
2764    /// Internal audit procedures.
2765    InternalAudit,
2766    /// Management review and oversight.
2767    ManagementReview,
2768    /// External audit procedures.
2769    ExternalAudit,
2770    /// Account reconciliation discrepancies.
2771    AccountReconciliation,
2772    /// Document examination.
2773    DocumentExamination,
2774    /// Discovered by accident.
2775    ByAccident,
2776    /// Automated monitoring/IT controls.
2777    ItControls,
2778    /// Surveillance or investigation.
2779    Surveillance,
2780    /// Confession by perpetrator.
2781    Confession,
2782    /// Law enforcement notification.
2783    LawEnforcement,
2784    /// Other detection method.
2785    Other,
2786}
2787
2788impl AcfeDetectionMethod {
2789    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2790    pub fn typical_detection_rate(&self) -> f64 {
2791        match self {
2792            AcfeDetectionMethod::Tip => 0.42,
2793            AcfeDetectionMethod::InternalAudit => 0.16,
2794            AcfeDetectionMethod::ManagementReview => 0.12,
2795            AcfeDetectionMethod::ExternalAudit => 0.04,
2796            AcfeDetectionMethod::AccountReconciliation => 0.05,
2797            AcfeDetectionMethod::DocumentExamination => 0.04,
2798            AcfeDetectionMethod::ByAccident => 0.06,
2799            AcfeDetectionMethod::ItControls => 0.03,
2800            AcfeDetectionMethod::Surveillance => 0.02,
2801            AcfeDetectionMethod::Confession => 0.02,
2802            AcfeDetectionMethod::LawEnforcement => 0.01,
2803            AcfeDetectionMethod::Other => 0.03,
2804        }
2805    }
2806
2807    /// Returns all variants for iteration.
2808    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2809        &[
2810            AcfeDetectionMethod::Tip,
2811            AcfeDetectionMethod::InternalAudit,
2812            AcfeDetectionMethod::ManagementReview,
2813            AcfeDetectionMethod::ExternalAudit,
2814            AcfeDetectionMethod::AccountReconciliation,
2815            AcfeDetectionMethod::DocumentExamination,
2816            AcfeDetectionMethod::ByAccident,
2817            AcfeDetectionMethod::ItControls,
2818            AcfeDetectionMethod::Surveillance,
2819            AcfeDetectionMethod::Confession,
2820            AcfeDetectionMethod::LawEnforcement,
2821            AcfeDetectionMethod::Other,
2822        ]
2823    }
2824}
2825
2826/// Department/position of perpetrator (from ACFE statistics).
2827#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2828pub enum PerpetratorDepartment {
2829    /// Accounting, finance, or bookkeeping.
2830    Accounting,
2831    /// Operations or manufacturing.
2832    Operations,
2833    /// Executive/upper management.
2834    Executive,
2835    /// Sales.
2836    Sales,
2837    /// Customer service.
2838    CustomerService,
2839    /// Purchasing/procurement.
2840    Purchasing,
2841    /// Information technology.
2842    It,
2843    /// Human resources.
2844    HumanResources,
2845    /// Administrative/clerical.
2846    Administrative,
2847    /// Warehouse/inventory.
2848    Warehouse,
2849    /// Board of directors.
2850    BoardOfDirectors,
2851    /// Other department.
2852    Other,
2853}
2854
2855impl PerpetratorDepartment {
2856    /// Returns the typical percentage of frauds by department (from ACFE reports).
2857    pub fn typical_occurrence_rate(&self) -> f64 {
2858        match self {
2859            PerpetratorDepartment::Accounting => 0.21,
2860            PerpetratorDepartment::Operations => 0.17,
2861            PerpetratorDepartment::Executive => 0.12,
2862            PerpetratorDepartment::Sales => 0.11,
2863            PerpetratorDepartment::CustomerService => 0.07,
2864            PerpetratorDepartment::Purchasing => 0.06,
2865            PerpetratorDepartment::It => 0.05,
2866            PerpetratorDepartment::HumanResources => 0.04,
2867            PerpetratorDepartment::Administrative => 0.04,
2868            PerpetratorDepartment::Warehouse => 0.03,
2869            PerpetratorDepartment::BoardOfDirectors => 0.02,
2870            PerpetratorDepartment::Other => 0.08,
2871        }
2872    }
2873
2874    /// Returns the typical median loss by perpetrator department.
2875    pub fn typical_median_loss(&self) -> Decimal {
2876        match self {
2877            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2878            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2879            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2880            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2881            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2882            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2883            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2884            _ => Decimal::new(80_000, 0),
2885        }
2886    }
2887}
2888
2889/// Perpetrator position level (from ACFE statistics).
2890#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2891pub enum PerpetratorLevel {
2892    /// Entry-level employee.
2893    Employee,
2894    /// Manager or supervisor.
2895    Manager,
2896    /// Owner, executive, or C-level.
2897    OwnerExecutive,
2898}
2899
2900impl PerpetratorLevel {
2901    /// Returns the typical percentage of frauds by position level.
2902    pub fn typical_occurrence_rate(&self) -> f64 {
2903        match self {
2904            PerpetratorLevel::Employee => 0.42,
2905            PerpetratorLevel::Manager => 0.36,
2906            PerpetratorLevel::OwnerExecutive => 0.22,
2907        }
2908    }
2909
2910    /// Returns the typical median loss by position level.
2911    pub fn typical_median_loss(&self) -> Decimal {
2912        match self {
2913            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2914            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2915            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2916        }
2917    }
2918}
2919
2920/// ACFE Calibration data for fraud generation.
2921///
2922/// Contains statistical parameters based on ACFE Report to the Nations
2923/// for realistic fraud pattern generation.
2924#[derive(Debug, Clone, Serialize, Deserialize)]
2925pub struct AcfeCalibration {
2926    /// Overall median loss for occupational fraud ($117,000 typical).
2927    pub median_loss: Decimal,
2928    /// Median duration in months before detection (12 months typical).
2929    pub median_duration_months: u32,
2930    /// Distribution of fraud by category.
2931    pub category_distribution: HashMap<String, f64>,
2932    /// Distribution of detection methods.
2933    pub detection_method_distribution: HashMap<String, f64>,
2934    /// Distribution by perpetrator department.
2935    pub department_distribution: HashMap<String, f64>,
2936    /// Distribution by perpetrator level.
2937    pub level_distribution: HashMap<String, f64>,
2938    /// Average number of red flags per fraud case.
2939    pub avg_red_flags_per_case: f64,
2940    /// Percentage of frauds involving collusion.
2941    pub collusion_rate: f64,
2942}
2943
2944impl Default for AcfeCalibration {
2945    fn default() -> Self {
2946        let mut category_distribution = HashMap::new();
2947        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2948        category_distribution.insert("corruption".to_string(), 0.33);
2949        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2950
2951        let mut detection_method_distribution = HashMap::new();
2952        for method in AcfeDetectionMethod::all_variants() {
2953            detection_method_distribution.insert(
2954                format!("{method:?}").to_lowercase(),
2955                method.typical_detection_rate(),
2956            );
2957        }
2958
2959        let mut department_distribution = HashMap::new();
2960        department_distribution.insert("accounting".to_string(), 0.21);
2961        department_distribution.insert("operations".to_string(), 0.17);
2962        department_distribution.insert("executive".to_string(), 0.12);
2963        department_distribution.insert("sales".to_string(), 0.11);
2964        department_distribution.insert("customer_service".to_string(), 0.07);
2965        department_distribution.insert("purchasing".to_string(), 0.06);
2966        department_distribution.insert("other".to_string(), 0.26);
2967
2968        let mut level_distribution = HashMap::new();
2969        level_distribution.insert("employee".to_string(), 0.42);
2970        level_distribution.insert("manager".to_string(), 0.36);
2971        level_distribution.insert("owner_executive".to_string(), 0.22);
2972
2973        Self {
2974            median_loss: Decimal::new(117_000, 0),
2975            median_duration_months: 12,
2976            category_distribution,
2977            detection_method_distribution,
2978            department_distribution,
2979            level_distribution,
2980            avg_red_flags_per_case: 2.8,
2981            collusion_rate: 0.50,
2982        }
2983    }
2984}
2985
2986impl AcfeCalibration {
2987    /// Creates a new ACFE calibration with the given parameters.
2988    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2989        Self {
2990            median_loss,
2991            median_duration_months,
2992            ..Self::default()
2993        }
2994    }
2995
2996    /// Returns the median loss for a specific category.
2997    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2998        category.typical_median_loss()
2999    }
3000
3001    /// Returns the median duration for a specific category.
3002    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
3003        category.typical_detection_months()
3004    }
3005
3006    /// Validates the calibration data.
3007    pub fn validate(&self) -> Result<(), String> {
3008        if self.median_loss <= Decimal::ZERO {
3009            return Err("Median loss must be positive".to_string());
3010        }
3011        if self.median_duration_months == 0 {
3012            return Err("Median duration must be at least 1 month".to_string());
3013        }
3014        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3015            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3016        }
3017        Ok(())
3018    }
3019}
3020
3021/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3022///
3023/// The fraud triangle is a model for explaining the factors that cause
3024/// someone to commit occupational fraud.
3025#[derive(Debug, Clone, Serialize, Deserialize)]
3026pub struct FraudTriangle {
3027    /// Pressure or incentive to commit fraud.
3028    pub pressure: PressureType,
3029    /// Opportunity factors that enable fraud.
3030    pub opportunities: Vec<OpportunityFactor>,
3031    /// Rationalization used to justify the fraud.
3032    pub rationalization: Rationalization,
3033}
3034
3035impl FraudTriangle {
3036    /// Creates a new fraud triangle.
3037    pub fn new(
3038        pressure: PressureType,
3039        opportunities: Vec<OpportunityFactor>,
3040        rationalization: Rationalization,
3041    ) -> Self {
3042        Self {
3043            pressure,
3044            opportunities,
3045            rationalization,
3046        }
3047    }
3048
3049    /// Returns a risk score based on the fraud triangle components.
3050    pub fn risk_score(&self) -> f64 {
3051        let pressure_score = self.pressure.risk_weight();
3052        let opportunity_score: f64 = self
3053            .opportunities
3054            .iter()
3055            .map(OpportunityFactor::risk_weight)
3056            .sum::<f64>()
3057            / self.opportunities.len().max(1) as f64;
3058        let rationalization_score = self.rationalization.risk_weight();
3059
3060        (pressure_score + opportunity_score + rationalization_score) / 3.0
3061    }
3062}
3063
3064/// Types of pressure/incentive that can lead to fraud.
3065#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3066pub enum PressureType {
3067    // Financial Pressures
3068    /// Personal financial difficulties (debt, lifestyle beyond means).
3069    PersonalFinancialDifficulties,
3070    /// Pressure to meet financial targets/earnings expectations.
3071    FinancialTargets,
3072    /// Market or analyst expectations.
3073    MarketExpectations,
3074    /// Debt covenant compliance requirements.
3075    CovenantCompliance,
3076    /// Credit rating maintenance.
3077    CreditRatingMaintenance,
3078    /// Acquisition/merger valuation pressure.
3079    AcquisitionValuation,
3080
3081    // Non-Financial Pressures
3082    /// Fear of job loss.
3083    JobSecurity,
3084    /// Pressure to maintain status or image.
3085    StatusMaintenance,
3086    /// Gambling addiction.
3087    GamblingAddiction,
3088    /// Substance abuse issues.
3089    SubstanceAbuse,
3090    /// Family pressure or obligations.
3091    FamilyPressure,
3092    /// Greed or desire for more.
3093    Greed,
3094}
3095
3096impl PressureType {
3097    /// Returns the risk weight (0.0-1.0) for this pressure type.
3098    pub fn risk_weight(&self) -> f64 {
3099        match self {
3100            PressureType::PersonalFinancialDifficulties => 0.80,
3101            PressureType::FinancialTargets => 0.75,
3102            PressureType::MarketExpectations => 0.70,
3103            PressureType::CovenantCompliance => 0.85,
3104            PressureType::CreditRatingMaintenance => 0.70,
3105            PressureType::AcquisitionValuation => 0.75,
3106            PressureType::JobSecurity => 0.65,
3107            PressureType::StatusMaintenance => 0.55,
3108            PressureType::GamblingAddiction => 0.90,
3109            PressureType::SubstanceAbuse => 0.85,
3110            PressureType::FamilyPressure => 0.60,
3111            PressureType::Greed => 0.70,
3112        }
3113    }
3114}
3115
3116/// Opportunity factors that enable fraud.
3117#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3118pub enum OpportunityFactor {
3119    /// Weak internal controls.
3120    WeakInternalControls,
3121    /// Lack of segregation of duties.
3122    LackOfSegregation,
3123    /// Override capability.
3124    ManagementOverride,
3125    /// Complex or unusual transactions.
3126    ComplexTransactions,
3127    /// Related party transactions.
3128    RelatedPartyTransactions,
3129    /// Poor tone at the top.
3130    PoorToneAtTop,
3131    /// Inadequate supervision.
3132    InadequateSupervision,
3133    /// Access to assets without accountability.
3134    AssetAccess,
3135    /// Inadequate record keeping.
3136    PoorRecordKeeping,
3137    /// Failure to discipline fraud perpetrators.
3138    LackOfDiscipline,
3139    /// Lack of independent checks.
3140    LackOfIndependentChecks,
3141}
3142
3143impl OpportunityFactor {
3144    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3145    pub fn risk_weight(&self) -> f64 {
3146        match self {
3147            OpportunityFactor::WeakInternalControls => 0.85,
3148            OpportunityFactor::LackOfSegregation => 0.80,
3149            OpportunityFactor::ManagementOverride => 0.90,
3150            OpportunityFactor::ComplexTransactions => 0.70,
3151            OpportunityFactor::RelatedPartyTransactions => 0.75,
3152            OpportunityFactor::PoorToneAtTop => 0.85,
3153            OpportunityFactor::InadequateSupervision => 0.75,
3154            OpportunityFactor::AssetAccess => 0.70,
3155            OpportunityFactor::PoorRecordKeeping => 0.65,
3156            OpportunityFactor::LackOfDiscipline => 0.60,
3157            OpportunityFactor::LackOfIndependentChecks => 0.75,
3158        }
3159    }
3160}
3161
3162/// Rationalizations used by fraud perpetrators.
3163#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3164pub enum Rationalization {
3165    /// "I'm just borrowing; I'll pay it back."
3166    TemporaryBorrowing,
3167    /// "Everyone does it."
3168    EveryoneDoesIt,
3169    /// "It's for the good of the company."
3170    ForTheCompanyGood,
3171    /// "I deserve this; the company owes me."
3172    Entitlement,
3173    /// "I was just following orders."
3174    FollowingOrders,
3175    /// "They won't miss it; they have plenty."
3176    TheyWontMissIt,
3177    /// "I need it more than they do."
3178    NeedItMore,
3179    /// "It's not really stealing."
3180    NotReallyStealing,
3181    /// "I'm underpaid for what I do."
3182    Underpaid,
3183    /// "It's a victimless crime."
3184    VictimlessCrime,
3185}
3186
3187impl Rationalization {
3188    /// Returns the risk weight (0.0-1.0) for this rationalization.
3189    pub fn risk_weight(&self) -> f64 {
3190        match self {
3191            // More dangerous rationalizations
3192            Rationalization::Entitlement => 0.85,
3193            Rationalization::EveryoneDoesIt => 0.80,
3194            Rationalization::NotReallyStealing => 0.80,
3195            Rationalization::TheyWontMissIt => 0.75,
3196            // Medium risk
3197            Rationalization::Underpaid => 0.70,
3198            Rationalization::ForTheCompanyGood => 0.65,
3199            Rationalization::NeedItMore => 0.65,
3200            // Lower risk (still indicates fraud)
3201            Rationalization::TemporaryBorrowing => 0.60,
3202            Rationalization::FollowingOrders => 0.55,
3203            Rationalization::VictimlessCrime => 0.60,
3204        }
3205    }
3206}
3207
3208// ============================================================================
3209// NEAR-MISS TYPES
3210// ============================================================================
3211
3212/// Type of near-miss pattern (suspicious but legitimate).
3213#[derive(Debug, Clone, Serialize, Deserialize)]
3214pub enum NearMissPattern {
3215    /// Transaction very similar to another (possible duplicate but legitimate).
3216    NearDuplicate {
3217        /// Date difference from similar transaction.
3218        date_difference_days: u32,
3219        /// Original transaction ID.
3220        similar_transaction_id: String,
3221    },
3222    /// Amount just below approval threshold (but legitimate).
3223    ThresholdProximity {
3224        /// The threshold being approached.
3225        threshold: Decimal,
3226        /// Percentage of threshold (0.0-1.0).
3227        proximity: f64,
3228    },
3229    /// Unusual but legitimate business pattern.
3230    UnusualLegitimate {
3231        /// Type of legitimate pattern.
3232        pattern_type: LegitimatePatternType,
3233        /// Business justification.
3234        justification: String,
3235    },
3236    /// Error that was caught and corrected.
3237    CorrectedError {
3238        /// Days until correction.
3239        correction_lag_days: u32,
3240        /// Correction document ID.
3241        correction_document_id: String,
3242    },
3243}
3244
3245/// Types of unusual but legitimate business patterns.
3246#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3247pub enum LegitimatePatternType {
3248    /// Year-end bonus payment.
3249    YearEndBonus,
3250    /// Contract prepayment.
3251    ContractPrepayment,
3252    /// Settlement payment.
3253    SettlementPayment,
3254    /// Insurance claim.
3255    InsuranceClaim,
3256    /// One-time vendor payment.
3257    OneTimePayment,
3258    /// Asset disposal.
3259    AssetDisposal,
3260    /// Seasonal inventory buildup.
3261    SeasonalInventory,
3262    /// Promotional spending.
3263    PromotionalSpending,
3264}
3265
3266impl LegitimatePatternType {
3267    /// Returns a description of this pattern type.
3268    pub fn description(&self) -> &'static str {
3269        match self {
3270            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3271            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3272            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3273            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3274            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3275            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3276            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3277            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3278        }
3279    }
3280}
3281
3282/// What might trigger a false positive for this near-miss.
3283#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3284pub enum FalsePositiveTrigger {
3285    /// Amount is near threshold.
3286    AmountNearThreshold,
3287    /// Timing is unusual.
3288    UnusualTiming,
3289    /// Similar to existing transaction.
3290    SimilarTransaction,
3291    /// New counterparty.
3292    NewCounterparty,
3293    /// Account combination unusual.
3294    UnusualAccountCombination,
3295    /// Volume spike.
3296    VolumeSpike,
3297    /// Round amount.
3298    RoundAmount,
3299}
3300
3301/// Label for a near-miss case.
3302#[derive(Debug, Clone, Serialize, Deserialize)]
3303pub struct NearMissLabel {
3304    /// Document ID.
3305    pub document_id: String,
3306    /// The near-miss pattern.
3307    pub pattern: NearMissPattern,
3308    /// How suspicious it appears (0.0-1.0).
3309    pub suspicion_score: f64,
3310    /// What would trigger a false positive.
3311    pub false_positive_trigger: FalsePositiveTrigger,
3312    /// Why this is actually legitimate.
3313    pub explanation: String,
3314}
3315
3316impl NearMissLabel {
3317    /// Creates a new near-miss label.
3318    pub fn new(
3319        document_id: impl Into<String>,
3320        pattern: NearMissPattern,
3321        suspicion_score: f64,
3322        trigger: FalsePositiveTrigger,
3323        explanation: impl Into<String>,
3324    ) -> Self {
3325        Self {
3326            document_id: document_id.into(),
3327            pattern,
3328            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3329            false_positive_trigger: trigger,
3330            explanation: explanation.into(),
3331        }
3332    }
3333}
3334
3335/// Configuration for anomaly rates.
3336#[derive(Debug, Clone, Serialize, Deserialize)]
3337pub struct AnomalyRateConfig {
3338    /// Overall anomaly rate (0.0 - 1.0).
3339    pub total_rate: f64,
3340    /// Fraud rate as proportion of anomalies.
3341    pub fraud_rate: f64,
3342    /// Error rate as proportion of anomalies.
3343    pub error_rate: f64,
3344    /// Process issue rate as proportion of anomalies.
3345    pub process_issue_rate: f64,
3346    /// Statistical anomaly rate as proportion of anomalies.
3347    pub statistical_rate: f64,
3348    /// Relational anomaly rate as proportion of anomalies.
3349    pub relational_rate: f64,
3350}
3351
3352impl Default for AnomalyRateConfig {
3353    fn default() -> Self {
3354        Self {
3355            total_rate: 0.02,         // 2% of transactions are anomalous
3356            fraud_rate: 0.25,         // 25% of anomalies are fraud
3357            error_rate: 0.35,         // 35% of anomalies are errors
3358            process_issue_rate: 0.20, // 20% are process issues
3359            statistical_rate: 0.15,   // 15% are statistical
3360            relational_rate: 0.05,    // 5% are relational
3361        }
3362    }
3363}
3364
3365impl AnomalyRateConfig {
3366    /// Validates that rates sum to approximately 1.0.
3367    pub fn validate(&self) -> Result<(), String> {
3368        let sum = self.fraud_rate
3369            + self.error_rate
3370            + self.process_issue_rate
3371            + self.statistical_rate
3372            + self.relational_rate;
3373
3374        if (sum - 1.0).abs() > 0.01 {
3375            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3376        }
3377
3378        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3379            return Err(format!(
3380                "Total rate must be between 0.0 and 1.0, got {}",
3381                self.total_rate
3382            ));
3383        }
3384
3385        Ok(())
3386    }
3387}
3388
3389#[cfg(test)]
3390mod tests {
3391    use super::*;
3392    use rust_decimal_macros::dec;
3393
3394    #[test]
3395    fn test_anomaly_type_category() {
3396        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3397        assert_eq!(fraud.category(), "Fraud");
3398        assert!(fraud.is_intentional());
3399
3400        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3401        assert_eq!(error.category(), "Error");
3402        assert!(!error.is_intentional());
3403    }
3404
3405    #[test]
3406    fn test_labeled_anomaly() {
3407        let anomaly = LabeledAnomaly::new(
3408            "ANO001".to_string(),
3409            AnomalyType::Fraud(FraudType::SelfApproval),
3410            "JE001".to_string(),
3411            "JE".to_string(),
3412            "1000".to_string(),
3413            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3414        )
3415        .with_description("User approved their own expense report")
3416        .with_related_entity("USER001");
3417
3418        assert_eq!(anomaly.severity, 3);
3419        assert!(anomaly.is_injected);
3420        assert_eq!(anomaly.related_entities.len(), 1);
3421    }
3422
3423    #[test]
3424    fn test_labeled_anomaly_with_provenance() {
3425        let anomaly = LabeledAnomaly::new(
3426            "ANO001".to_string(),
3427            AnomalyType::Fraud(FraudType::SelfApproval),
3428            "JE001".to_string(),
3429            "JE".to_string(),
3430            "1000".to_string(),
3431            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3432        )
3433        .with_run_id("run-123")
3434        .with_generation_seed(42)
3435        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3436        .with_structured_strategy(InjectionStrategy::SelfApproval {
3437            user_id: "USER001".to_string(),
3438        })
3439        .with_scenario("scenario-001")
3440        .with_original_document_hash("abc123");
3441
3442        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3443        assert_eq!(anomaly.generation_seed, Some(42));
3444        assert!(anomaly.causal_reason.is_some());
3445        assert!(anomaly.structured_strategy.is_some());
3446        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3447        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3448
3449        // Check that legacy injection_strategy is also set
3450        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3451    }
3452
3453    #[test]
3454    fn test_labeled_anomaly_derivation_chain() {
3455        let parent = LabeledAnomaly::new(
3456            "ANO001".to_string(),
3457            AnomalyType::Fraud(FraudType::DuplicatePayment),
3458            "JE001".to_string(),
3459            "JE".to_string(),
3460            "1000".to_string(),
3461            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3462        );
3463
3464        let child = LabeledAnomaly::new(
3465            "ANO002".to_string(),
3466            AnomalyType::Error(ErrorType::DuplicateEntry),
3467            "JE002".to_string(),
3468            "JE".to_string(),
3469            "1000".to_string(),
3470            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3471        )
3472        .with_parent_anomaly(&parent.anomaly_id);
3473
3474        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3475    }
3476
3477    #[test]
3478    fn test_injection_strategy_description() {
3479        let strategy = InjectionStrategy::AmountManipulation {
3480            original: dec!(1000),
3481            factor: 2.5,
3482        };
3483        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3484        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3485
3486        let strategy = InjectionStrategy::ThresholdAvoidance {
3487            threshold: dec!(10000),
3488            adjusted_amount: dec!(9999),
3489        };
3490        assert_eq!(
3491            strategy.description(),
3492            "Amount adjusted to avoid 10000 threshold"
3493        );
3494
3495        let strategy = InjectionStrategy::DateShift {
3496            days_shifted: -5,
3497            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3498        };
3499        assert_eq!(strategy.description(), "Date backdated by 5 days");
3500
3501        let strategy = InjectionStrategy::DateShift {
3502            days_shifted: 3,
3503            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3504        };
3505        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3506    }
3507
3508    #[test]
3509    fn test_causal_reason_variants() {
3510        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3511        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3512            assert!((base_rate - 0.02).abs() < 0.001);
3513        }
3514
3515        let reason = AnomalyCausalReason::TemporalPattern {
3516            pattern_name: "year_end_spike".to_string(),
3517        };
3518        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3519            assert_eq!(pattern_name, "year_end_spike");
3520        }
3521
3522        let reason = AnomalyCausalReason::ScenarioStep {
3523            scenario_type: "kickback".to_string(),
3524            step_number: 3,
3525        };
3526        if let AnomalyCausalReason::ScenarioStep {
3527            scenario_type,
3528            step_number,
3529        } = reason
3530        {
3531            assert_eq!(scenario_type, "kickback");
3532            assert_eq!(step_number, 3);
3533        }
3534    }
3535
3536    #[test]
3537    fn test_feature_vector_length() {
3538        let anomaly = LabeledAnomaly::new(
3539            "ANO001".to_string(),
3540            AnomalyType::Fraud(FraudType::SelfApproval),
3541            "JE001".to_string(),
3542            "JE".to_string(),
3543            "1000".to_string(),
3544            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3545        );
3546
3547        let features = anomaly.to_features();
3548        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3549        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3550    }
3551
3552    #[test]
3553    fn test_feature_vector_with_provenance() {
3554        let anomaly = LabeledAnomaly::new(
3555            "ANO001".to_string(),
3556            AnomalyType::Fraud(FraudType::SelfApproval),
3557            "JE001".to_string(),
3558            "JE".to_string(),
3559            "1000".to_string(),
3560            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3561        )
3562        .with_scenario("scenario-001")
3563        .with_parent_anomaly("ANO000");
3564
3565        let features = anomaly.to_features();
3566
3567        // Last two features should be 1.0 (has scenario, has parent)
3568        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3569        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3570    }
3571
3572    #[test]
3573    fn test_anomaly_summary() {
3574        let anomalies = vec![
3575            LabeledAnomaly::new(
3576                "ANO001".to_string(),
3577                AnomalyType::Fraud(FraudType::SelfApproval),
3578                "JE001".to_string(),
3579                "JE".to_string(),
3580                "1000".to_string(),
3581                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3582            ),
3583            LabeledAnomaly::new(
3584                "ANO002".to_string(),
3585                AnomalyType::Error(ErrorType::DuplicateEntry),
3586                "JE002".to_string(),
3587                "JE".to_string(),
3588                "1000".to_string(),
3589                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3590            ),
3591        ];
3592
3593        let summary = AnomalySummary::from_anomalies(&anomalies);
3594
3595        assert_eq!(summary.total_count, 2);
3596        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3597        assert_eq!(summary.by_category.get("Error"), Some(&1));
3598    }
3599
3600    #[test]
3601    fn test_rate_config_validation() {
3602        let config = AnomalyRateConfig::default();
3603        assert!(config.validate().is_ok());
3604
3605        let bad_config = AnomalyRateConfig {
3606            fraud_rate: 0.5,
3607            error_rate: 0.5,
3608            process_issue_rate: 0.5, // Sum > 1.0
3609            ..Default::default()
3610        };
3611        assert!(bad_config.validate().is_err());
3612    }
3613
3614    #[test]
3615    fn test_injection_strategy_serialization() {
3616        let strategy = InjectionStrategy::SoDViolation {
3617            duty1: "CreatePO".to_string(),
3618            duty2: "ApprovePO".to_string(),
3619            violating_user: "USER001".to_string(),
3620        };
3621
3622        let json = serde_json::to_string(&strategy).unwrap();
3623        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3624
3625        assert_eq!(strategy, deserialized);
3626    }
3627
3628    #[test]
3629    fn test_labeled_anomaly_serialization_with_provenance() {
3630        let anomaly = LabeledAnomaly::new(
3631            "ANO001".to_string(),
3632            AnomalyType::Fraud(FraudType::SelfApproval),
3633            "JE001".to_string(),
3634            "JE".to_string(),
3635            "1000".to_string(),
3636            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3637        )
3638        .with_run_id("run-123")
3639        .with_generation_seed(42)
3640        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3641
3642        let json = serde_json::to_string(&anomaly).unwrap();
3643        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3644
3645        assert_eq!(anomaly.run_id, deserialized.run_id);
3646        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3647    }
3648
3649    // ========================================
3650    // FR-003 ENHANCED TAXONOMY TESTS
3651    // ========================================
3652
3653    #[test]
3654    fn test_anomaly_category_from_anomaly_type() {
3655        // Fraud mappings
3656        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3657        assert_eq!(
3658            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3659            AnomalyCategory::FictitiousVendor
3660        );
3661
3662        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3663        assert_eq!(
3664            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3665            AnomalyCategory::VendorKickback
3666        );
3667
3668        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3669        assert_eq!(
3670            AnomalyCategory::from_anomaly_type(&fraud_structured),
3671            AnomalyCategory::StructuredTransaction
3672        );
3673
3674        // Error mappings
3675        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3676        assert_eq!(
3677            AnomalyCategory::from_anomaly_type(&error_duplicate),
3678            AnomalyCategory::DuplicatePayment
3679        );
3680
3681        // Process issue mappings
3682        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3683        assert_eq!(
3684            AnomalyCategory::from_anomaly_type(&process_skip),
3685            AnomalyCategory::MissingApproval
3686        );
3687
3688        // Relational mappings
3689        let relational_circular =
3690            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3691        assert_eq!(
3692            AnomalyCategory::from_anomaly_type(&relational_circular),
3693            AnomalyCategory::CircularFlow
3694        );
3695    }
3696
3697    #[test]
3698    fn test_anomaly_category_ordinal() {
3699        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3700        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3701        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3702    }
3703
3704    #[test]
3705    fn test_contributing_factor() {
3706        let factor = ContributingFactor::new(
3707            FactorType::AmountDeviation,
3708            15000.0,
3709            10000.0,
3710            true,
3711            0.5,
3712            "Amount exceeds threshold",
3713        );
3714
3715        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3716        assert_eq!(factor.value, 15000.0);
3717        assert_eq!(factor.threshold, 10000.0);
3718        assert!(factor.direction_greater);
3719
3720        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3721        let contribution = factor.contribution();
3722        assert!((contribution - 0.25).abs() < 0.01);
3723    }
3724
3725    #[test]
3726    fn test_contributing_factor_with_evidence() {
3727        let mut data = HashMap::new();
3728        data.insert("expected".to_string(), "10000".to_string());
3729        data.insert("actual".to_string(), "15000".to_string());
3730
3731        let factor = ContributingFactor::new(
3732            FactorType::AmountDeviation,
3733            15000.0,
3734            10000.0,
3735            true,
3736            0.5,
3737            "Amount deviation detected",
3738        )
3739        .with_evidence("transaction_history", data);
3740
3741        assert!(factor.evidence.is_some());
3742        let evidence = factor.evidence.unwrap();
3743        assert_eq!(evidence.source, "transaction_history");
3744        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3745    }
3746
3747    #[test]
3748    fn test_enhanced_anomaly_label() {
3749        let base = LabeledAnomaly::new(
3750            "ANO001".to_string(),
3751            AnomalyType::Fraud(FraudType::DuplicatePayment),
3752            "JE001".to_string(),
3753            "JE".to_string(),
3754            "1000".to_string(),
3755            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3756        );
3757
3758        let enhanced = EnhancedAnomalyLabel::from_base(base)
3759            .with_confidence(0.85)
3760            .with_severity(0.7)
3761            .with_factor(ContributingFactor::new(
3762                FactorType::DuplicateIndicator,
3763                1.0,
3764                0.5,
3765                true,
3766                0.4,
3767                "Duplicate payment detected",
3768            ))
3769            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3770
3771        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3772        assert_eq!(enhanced.enhanced_confidence, 0.85);
3773        assert_eq!(enhanced.enhanced_severity, 0.7);
3774        assert_eq!(enhanced.contributing_factors.len(), 1);
3775        assert_eq!(enhanced.secondary_categories.len(), 1);
3776    }
3777
3778    #[test]
3779    fn test_enhanced_anomaly_label_features() {
3780        let base = LabeledAnomaly::new(
3781            "ANO001".to_string(),
3782            AnomalyType::Fraud(FraudType::SelfApproval),
3783            "JE001".to_string(),
3784            "JE".to_string(),
3785            "1000".to_string(),
3786            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3787        );
3788
3789        let enhanced = EnhancedAnomalyLabel::from_base(base)
3790            .with_confidence(0.9)
3791            .with_severity(0.8)
3792            .with_factor(ContributingFactor::new(
3793                FactorType::ControlBypass,
3794                1.0,
3795                0.0,
3796                true,
3797                0.5,
3798                "Control bypass detected",
3799            ));
3800
3801        let features = enhanced.to_features();
3802
3803        // Should have 25 features (15 base + 10 enhanced)
3804        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3805        assert_eq!(features.len(), 25);
3806
3807        // Check enhanced confidence is in features
3808        assert_eq!(features[15], 0.9); // enhanced_confidence
3809
3810        // Check has_control_bypass flag
3811        assert_eq!(features[21], 1.0); // has_control_bypass
3812    }
3813
3814    #[test]
3815    fn test_enhanced_anomaly_label_feature_names() {
3816        let names = EnhancedAnomalyLabel::feature_names();
3817        assert_eq!(names.len(), 25);
3818        assert!(names.contains(&"enhanced_confidence"));
3819        assert!(names.contains(&"enhanced_severity"));
3820        assert!(names.contains(&"has_control_bypass"));
3821    }
3822
3823    #[test]
3824    fn test_factor_type_names() {
3825        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3826        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3827        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3828    }
3829
3830    #[test]
3831    fn test_anomaly_category_serialization() {
3832        let category = AnomalyCategory::CircularFlow;
3833        let json = serde_json::to_string(&category).unwrap();
3834        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3835        assert_eq!(category, deserialized);
3836
3837        let custom = AnomalyCategory::Custom("custom_type".to_string());
3838        let json = serde_json::to_string(&custom).unwrap();
3839        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3840        assert_eq!(custom, deserialized);
3841    }
3842
3843    #[test]
3844    fn test_enhanced_label_secondary_category_dedup() {
3845        let base = LabeledAnomaly::new(
3846            "ANO001".to_string(),
3847            AnomalyType::Fraud(FraudType::DuplicatePayment),
3848            "JE001".to_string(),
3849            "JE".to_string(),
3850            "1000".to_string(),
3851            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3852        );
3853
3854        let enhanced = EnhancedAnomalyLabel::from_base(base)
3855            // Try to add the primary category as secondary (should be ignored)
3856            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3857            // Add a valid secondary
3858            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3859            // Try to add duplicate secondary (should be ignored)
3860            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3861
3862        // Should only have 1 secondary category (TimingAnomaly)
3863        assert_eq!(enhanced.secondary_categories.len(), 1);
3864        assert_eq!(
3865            enhanced.secondary_categories[0],
3866            AnomalyCategory::TimingAnomaly
3867        );
3868    }
3869
3870    // ==========================================================================
3871    // Accounting Standards Fraud Type Tests
3872    // ==========================================================================
3873
3874    #[test]
3875    fn test_revenue_recognition_fraud_types() {
3876        // Test ASC 606/IFRS 15 related fraud types
3877        let fraud_types = [
3878            FraudType::ImproperRevenueRecognition,
3879            FraudType::ImproperPoAllocation,
3880            FraudType::VariableConsiderationManipulation,
3881            FraudType::ContractModificationMisstatement,
3882        ];
3883
3884        for fraud_type in fraud_types {
3885            let anomaly_type = AnomalyType::Fraud(fraud_type);
3886            assert_eq!(anomaly_type.category(), "Fraud");
3887            assert!(anomaly_type.is_intentional());
3888            assert!(anomaly_type.severity() >= 3);
3889        }
3890    }
3891
3892    #[test]
3893    fn test_lease_accounting_fraud_types() {
3894        // Test ASC 842/IFRS 16 related fraud types
3895        let fraud_types = [
3896            FraudType::LeaseClassificationManipulation,
3897            FraudType::OffBalanceSheetLease,
3898            FraudType::LeaseLiabilityUnderstatement,
3899            FraudType::RouAssetMisstatement,
3900        ];
3901
3902        for fraud_type in fraud_types {
3903            let anomaly_type = AnomalyType::Fraud(fraud_type);
3904            assert_eq!(anomaly_type.category(), "Fraud");
3905            assert!(anomaly_type.is_intentional());
3906            assert!(anomaly_type.severity() >= 3);
3907        }
3908
3909        // Off-balance sheet lease fraud should be high severity
3910        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3911    }
3912
3913    #[test]
3914    fn test_fair_value_fraud_types() {
3915        // Test ASC 820/IFRS 13 related fraud types
3916        let fraud_types = [
3917            FraudType::FairValueHierarchyManipulation,
3918            FraudType::Level3InputManipulation,
3919            FraudType::ValuationTechniqueManipulation,
3920        ];
3921
3922        for fraud_type in fraud_types {
3923            let anomaly_type = AnomalyType::Fraud(fraud_type);
3924            assert_eq!(anomaly_type.category(), "Fraud");
3925            assert!(anomaly_type.is_intentional());
3926            assert!(anomaly_type.severity() >= 4);
3927        }
3928
3929        // Level 3 manipulation is highest severity (unobservable inputs)
3930        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3931    }
3932
3933    #[test]
3934    fn test_impairment_fraud_types() {
3935        // Test ASC 360/IAS 36 related fraud types
3936        let fraud_types = [
3937            FraudType::DelayedImpairment,
3938            FraudType::ImpairmentTestAvoidance,
3939            FraudType::CashFlowProjectionManipulation,
3940            FraudType::ImproperImpairmentReversal,
3941        ];
3942
3943        for fraud_type in fraud_types {
3944            let anomaly_type = AnomalyType::Fraud(fraud_type);
3945            assert_eq!(anomaly_type.category(), "Fraud");
3946            assert!(anomaly_type.is_intentional());
3947            assert!(anomaly_type.severity() >= 3);
3948        }
3949
3950        // Cash flow manipulation has highest severity
3951        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3952    }
3953
3954    // ==========================================================================
3955    // Accounting Standards Error Type Tests
3956    // ==========================================================================
3957
3958    #[test]
3959    fn test_standards_error_types() {
3960        // Test non-fraudulent accounting standards errors
3961        let error_types = [
3962            ErrorType::RevenueTimingError,
3963            ErrorType::PoAllocationError,
3964            ErrorType::LeaseClassificationError,
3965            ErrorType::LeaseCalculationError,
3966            ErrorType::FairValueError,
3967            ErrorType::ImpairmentCalculationError,
3968            ErrorType::DiscountRateError,
3969            ErrorType::FrameworkApplicationError,
3970        ];
3971
3972        for error_type in error_types {
3973            let anomaly_type = AnomalyType::Error(error_type);
3974            assert_eq!(anomaly_type.category(), "Error");
3975            assert!(!anomaly_type.is_intentional());
3976            assert!(anomaly_type.severity() >= 3);
3977        }
3978    }
3979
3980    #[test]
3981    fn test_framework_application_error() {
3982        // Test IFRS vs GAAP confusion errors
3983        let error_type = ErrorType::FrameworkApplicationError;
3984        assert_eq!(error_type.severity(), 4);
3985
3986        let anomaly = LabeledAnomaly::new(
3987            "ERR001".to_string(),
3988            AnomalyType::Error(error_type),
3989            "JE100".to_string(),
3990            "JE".to_string(),
3991            "1000".to_string(),
3992            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3993        )
3994        .with_description("LIFO inventory method used under IFRS (not permitted)")
3995        .with_metadata("framework", "IFRS")
3996        .with_metadata("standard_violated", "IAS 2");
3997
3998        assert_eq!(anomaly.anomaly_type.category(), "Error");
3999        assert_eq!(
4000            anomaly.metadata.get("standard_violated"),
4001            Some(&"IAS 2".to_string())
4002        );
4003    }
4004
4005    #[test]
4006    fn test_standards_anomaly_serialization() {
4007        // Test that new fraud types serialize/deserialize correctly
4008        let fraud_types = [
4009            FraudType::ImproperRevenueRecognition,
4010            FraudType::LeaseClassificationManipulation,
4011            FraudType::FairValueHierarchyManipulation,
4012            FraudType::DelayedImpairment,
4013        ];
4014
4015        for fraud_type in fraud_types {
4016            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4017            let deserialized: FraudType =
4018                serde_json::from_str(&json).expect("Failed to deserialize");
4019            assert_eq!(fraud_type, deserialized);
4020        }
4021
4022        // Test error types
4023        let error_types = [
4024            ErrorType::RevenueTimingError,
4025            ErrorType::LeaseCalculationError,
4026            ErrorType::FairValueError,
4027            ErrorType::FrameworkApplicationError,
4028        ];
4029
4030        for error_type in error_types {
4031            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4032            let deserialized: ErrorType =
4033                serde_json::from_str(&json).expect("Failed to deserialize");
4034            assert_eq!(error_type, deserialized);
4035        }
4036    }
4037
4038    #[test]
4039    fn test_standards_labeled_anomaly() {
4040        // Test creating a labeled anomaly for a standards violation
4041        let anomaly = LabeledAnomaly::new(
4042            "STD001".to_string(),
4043            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4044            "CONTRACT-2024-001".to_string(),
4045            "Revenue".to_string(),
4046            "1000".to_string(),
4047            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4048        )
4049        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4050        .with_monetary_impact(dec!(500000))
4051        .with_metadata("standard", "ASC 606")
4052        .with_metadata("paragraph", "606-10-25-1")
4053        .with_metadata("contract_id", "C-2024-001")
4054        .with_related_entity("CONTRACT-2024-001")
4055        .with_related_entity("CUSTOMER-500");
4056
4057        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4058        assert!(anomaly.is_injected);
4059        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4060        assert_eq!(anomaly.related_entities.len(), 2);
4061        assert_eq!(
4062            anomaly.metadata.get("standard"),
4063            Some(&"ASC 606".to_string())
4064        );
4065    }
4066
4067    // ==========================================================================
4068    // Multi-Dimensional Labeling Tests
4069    // ==========================================================================
4070
4071    #[test]
4072    fn test_severity_level() {
4073        assert_eq!(SeverityLevel::Low.numeric(), 1);
4074        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4075
4076        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4077        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4078
4079        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4080        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4081
4082        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4083    }
4084
4085    #[test]
4086    fn test_anomaly_severity() {
4087        let severity =
4088            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4089
4090        assert_eq!(severity.level, SeverityLevel::High);
4091        assert!(severity.is_material);
4092        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4093
4094        // Not material
4095        let low_severity =
4096            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4097        assert!(!low_severity.is_material);
4098    }
4099
4100    #[test]
4101    fn test_detection_difficulty() {
4102        assert!(
4103            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4104        );
4105        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4106
4107        assert_eq!(
4108            AnomalyDetectionDifficulty::from_score(0.05),
4109            AnomalyDetectionDifficulty::Trivial
4110        );
4111        assert_eq!(
4112            AnomalyDetectionDifficulty::from_score(0.90),
4113            AnomalyDetectionDifficulty::Expert
4114        );
4115
4116        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4117    }
4118
4119    #[test]
4120    fn test_ground_truth_certainty() {
4121        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4122        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4123        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4124    }
4125
4126    #[test]
4127    fn test_detection_method() {
4128        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4129        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4130    }
4131
4132    #[test]
4133    fn test_extended_anomaly_label() {
4134        let base = LabeledAnomaly::new(
4135            "ANO001".to_string(),
4136            AnomalyType::Fraud(FraudType::FictitiousVendor),
4137            "JE001".to_string(),
4138            "JE".to_string(),
4139            "1000".to_string(),
4140            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4141        )
4142        .with_monetary_impact(dec!(100000));
4143
4144        let extended = ExtendedAnomalyLabel::from_base(base)
4145            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4146            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4147            .with_method(DetectionMethod::GraphBased)
4148            .with_method(DetectionMethod::ForensicAudit)
4149            .with_indicator("New vendor with no history")
4150            .with_indicator("Large first transaction")
4151            .with_certainty(GroundTruthCertainty::Definite)
4152            .with_entity("V001")
4153            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4154            .with_scheme("SCHEME001", 2);
4155
4156        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4157        assert_eq!(
4158            extended.detection_difficulty,
4159            AnomalyDetectionDifficulty::Hard
4160        );
4161        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4162        assert_eq!(extended.recommended_methods.len(), 3);
4163        assert_eq!(extended.key_indicators.len(), 2);
4164        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4165        assert_eq!(extended.scheme_stage, Some(2));
4166    }
4167
4168    #[test]
4169    fn test_extended_anomaly_label_features() {
4170        let base = LabeledAnomaly::new(
4171            "ANO001".to_string(),
4172            AnomalyType::Fraud(FraudType::SelfApproval),
4173            "JE001".to_string(),
4174            "JE".to_string(),
4175            "1000".to_string(),
4176            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4177        );
4178
4179        let extended =
4180            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4181
4182        let features = extended.to_features();
4183        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4184        assert_eq!(features.len(), 30);
4185
4186        // Check difficulty score is in features
4187        let difficulty_idx = 18; // Position of difficulty_score
4188        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4189    }
4190
4191    #[test]
4192    fn test_extended_label_near_miss() {
4193        let base = LabeledAnomaly::new(
4194            "ANO001".to_string(),
4195            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4196            "JE001".to_string(),
4197            "JE".to_string(),
4198            "1000".to_string(),
4199            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4200        );
4201
4202        let extended = ExtendedAnomalyLabel::from_base(base)
4203            .as_near_miss("Year-end bonus payment, legitimately high");
4204
4205        assert!(extended.is_near_miss);
4206        assert!(extended.near_miss_explanation.is_some());
4207    }
4208
4209    #[test]
4210    fn test_scheme_type() {
4211        assert_eq!(
4212            SchemeType::GradualEmbezzlement.name(),
4213            "gradual_embezzlement"
4214        );
4215        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4216        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4217    }
4218
4219    #[test]
4220    fn test_concealment_technique() {
4221        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4222        assert!(
4223            ConcealmentTechnique::Collusion.difficulty_bonus()
4224                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4225        );
4226    }
4227
4228    #[test]
4229    fn test_near_miss_label() {
4230        let near_miss = NearMissLabel::new(
4231            "JE001",
4232            NearMissPattern::ThresholdProximity {
4233                threshold: dec!(10000),
4234                proximity: 0.95,
4235            },
4236            0.7,
4237            FalsePositiveTrigger::AmountNearThreshold,
4238            "Transaction is 95% of threshold but business justified",
4239        );
4240
4241        assert_eq!(near_miss.document_id, "JE001");
4242        assert_eq!(near_miss.suspicion_score, 0.7);
4243        assert_eq!(
4244            near_miss.false_positive_trigger,
4245            FalsePositiveTrigger::AmountNearThreshold
4246        );
4247    }
4248
4249    #[test]
4250    fn test_legitimate_pattern_type() {
4251        assert_eq!(
4252            LegitimatePatternType::YearEndBonus.description(),
4253            "Year-end bonus payment"
4254        );
4255        assert_eq!(
4256            LegitimatePatternType::InsuranceClaim.description(),
4257            "Insurance claim reimbursement"
4258        );
4259    }
4260
4261    #[test]
4262    fn test_severity_detection_difficulty_serialization() {
4263        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4264        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4265        let deserialized: AnomalySeverity =
4266            serde_json::from_str(&json).expect("Failed to deserialize");
4267        assert_eq!(severity.level, deserialized.level);
4268
4269        let difficulty = AnomalyDetectionDifficulty::Hard;
4270        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4271        let deserialized: AnomalyDetectionDifficulty =
4272            serde_json::from_str(&json).expect("Failed to deserialize");
4273        assert_eq!(difficulty, deserialized);
4274    }
4275
4276    // ========================================
4277    // ACFE Taxonomy Tests
4278    // ========================================
4279
4280    #[test]
4281    fn test_acfe_fraud_category() {
4282        let asset = AcfeFraudCategory::AssetMisappropriation;
4283        assert_eq!(asset.name(), "asset_misappropriation");
4284        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4285        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4286        assert_eq!(asset.typical_detection_months(), 12);
4287
4288        let corruption = AcfeFraudCategory::Corruption;
4289        assert_eq!(corruption.name(), "corruption");
4290        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4291
4292        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4293        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4294        assert_eq!(fs_fraud.typical_detection_months(), 24);
4295    }
4296
4297    #[test]
4298    fn test_cash_fraud_scheme() {
4299        let shell = CashFraudScheme::ShellCompany;
4300        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4301        assert_eq!(shell.subcategory(), "billing_schemes");
4302        assert_eq!(shell.severity(), 5);
4303        assert_eq!(
4304            shell.detection_difficulty(),
4305            AnomalyDetectionDifficulty::Hard
4306        );
4307
4308        let ghost = CashFraudScheme::GhostEmployee;
4309        assert_eq!(ghost.subcategory(), "payroll_schemes");
4310        assert_eq!(ghost.severity(), 5);
4311
4312        // Test all variants exist
4313        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4314    }
4315
4316    #[test]
4317    fn test_asset_fraud_scheme() {
4318        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4319        assert_eq!(
4320            ip_theft.category(),
4321            AcfeFraudCategory::AssetMisappropriation
4322        );
4323        assert_eq!(ip_theft.subcategory(), "other_assets");
4324        assert_eq!(ip_theft.severity(), 5);
4325
4326        let inv_theft = AssetFraudScheme::InventoryTheft;
4327        assert_eq!(inv_theft.subcategory(), "inventory");
4328        assert_eq!(inv_theft.severity(), 4);
4329    }
4330
4331    #[test]
4332    fn test_corruption_scheme() {
4333        let kickback = CorruptionScheme::InvoiceKickback;
4334        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4335        assert_eq!(kickback.subcategory(), "bribery");
4336        assert_eq!(kickback.severity(), 5);
4337        assert_eq!(
4338            kickback.detection_difficulty(),
4339            AnomalyDetectionDifficulty::Expert
4340        );
4341
4342        let bid_rigging = CorruptionScheme::BidRigging;
4343        assert_eq!(bid_rigging.subcategory(), "bribery");
4344        assert_eq!(
4345            bid_rigging.detection_difficulty(),
4346            AnomalyDetectionDifficulty::Hard
4347        );
4348
4349        let purchasing = CorruptionScheme::PurchasingConflict;
4350        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4351
4352        // Test all variants exist
4353        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4354    }
4355
4356    #[test]
4357    fn test_financial_statement_scheme() {
4358        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4359        assert_eq!(
4360            fictitious.category(),
4361            AcfeFraudCategory::FinancialStatementFraud
4362        );
4363        assert_eq!(fictitious.subcategory(), "overstatement");
4364        assert_eq!(fictitious.severity(), 5);
4365        assert_eq!(
4366            fictitious.detection_difficulty(),
4367            AnomalyDetectionDifficulty::Expert
4368        );
4369
4370        let understated = FinancialStatementScheme::UnderstatedRevenues;
4371        assert_eq!(understated.subcategory(), "understatement");
4372
4373        // Test all variants exist
4374        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4375    }
4376
4377    #[test]
4378    fn test_acfe_scheme_unified() {
4379        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4380        assert_eq!(
4381            cash_scheme.category(),
4382            AcfeFraudCategory::AssetMisappropriation
4383        );
4384        assert_eq!(cash_scheme.severity(), 5);
4385
4386        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4387        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4388
4389        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4390        assert_eq!(
4391            fs_scheme.category(),
4392            AcfeFraudCategory::FinancialStatementFraud
4393        );
4394    }
4395
4396    #[test]
4397    fn test_acfe_detection_method() {
4398        let tip = AcfeDetectionMethod::Tip;
4399        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4400
4401        let internal_audit = AcfeDetectionMethod::InternalAudit;
4402        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4403
4404        let external_audit = AcfeDetectionMethod::ExternalAudit;
4405        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4406
4407        // Test all variants exist
4408        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4409    }
4410
4411    #[test]
4412    fn test_perpetrator_department() {
4413        let accounting = PerpetratorDepartment::Accounting;
4414        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4415        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4416
4417        let executive = PerpetratorDepartment::Executive;
4418        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4419    }
4420
4421    #[test]
4422    fn test_perpetrator_level() {
4423        let employee = PerpetratorLevel::Employee;
4424        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4425        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4426
4427        let exec = PerpetratorLevel::OwnerExecutive;
4428        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4429    }
4430
4431    #[test]
4432    fn test_acfe_calibration() {
4433        let cal = AcfeCalibration::default();
4434        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4435        assert_eq!(cal.median_duration_months, 12);
4436        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4437        assert!(cal.validate().is_ok());
4438
4439        // Test custom calibration
4440        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4441        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4442        assert_eq!(custom_cal.median_duration_months, 18);
4443
4444        // Test validation failure
4445        let bad_cal = AcfeCalibration {
4446            collusion_rate: 1.5,
4447            ..Default::default()
4448        };
4449        assert!(bad_cal.validate().is_err());
4450    }
4451
4452    #[test]
4453    fn test_fraud_triangle() {
4454        let triangle = FraudTriangle::new(
4455            PressureType::FinancialTargets,
4456            vec![
4457                OpportunityFactor::WeakInternalControls,
4458                OpportunityFactor::ManagementOverride,
4459            ],
4460            Rationalization::ForTheCompanyGood,
4461        );
4462
4463        // Risk score should be between 0 and 1
4464        let risk = triangle.risk_score();
4465        assert!((0.0..=1.0).contains(&risk));
4466        // Should be relatively high given the components
4467        assert!(risk > 0.5);
4468    }
4469
4470    #[test]
4471    fn test_pressure_types() {
4472        let financial = PressureType::FinancialTargets;
4473        assert!(financial.risk_weight() > 0.5);
4474
4475        let gambling = PressureType::GamblingAddiction;
4476        assert_eq!(gambling.risk_weight(), 0.90);
4477    }
4478
4479    #[test]
4480    fn test_opportunity_factors() {
4481        let override_factor = OpportunityFactor::ManagementOverride;
4482        assert_eq!(override_factor.risk_weight(), 0.90);
4483
4484        let weak_controls = OpportunityFactor::WeakInternalControls;
4485        assert!(weak_controls.risk_weight() > 0.8);
4486    }
4487
4488    #[test]
4489    fn test_rationalizations() {
4490        let entitlement = Rationalization::Entitlement;
4491        assert!(entitlement.risk_weight() > 0.8);
4492
4493        let borrowing = Rationalization::TemporaryBorrowing;
4494        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4495    }
4496
4497    #[test]
4498    fn test_acfe_scheme_serialization() {
4499        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4500        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4501        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4502        assert_eq!(scheme, deserialized);
4503
4504        let calibration = AcfeCalibration::default();
4505        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4506        let deserialized: AcfeCalibration =
4507            serde_json::from_str(&json).expect("Failed to deserialize");
4508        assert_eq!(calibration.median_loss, deserialized.median_loss);
4509    }
4510}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs