datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Unauthorized customer/vendor discount (sweethearting, side deals).
352    UnauthorizedDiscount,
353    /// Round-tripping funds through multiple entities or accounts to
354    /// inflate apparent activity or obscure origin.
355    RoundTripping,
356    /// Invoice manipulation.
357    InvoiceManipulation,
358
359    // Asset Fraud
360    /// Misappropriation of assets.
361    AssetMisappropriation,
362    /// Inventory theft.
363    InventoryTheft,
364    /// Ghost employee.
365    GhostEmployee,
366
367    // Financial Statement Fraud
368    /// Premature revenue recognition.
369    PrematureRevenue,
370    /// Understated liabilities.
371    UnderstatedLiabilities,
372    /// Overstated assets.
373    OverstatedAssets,
374    /// Channel stuffing.
375    ChannelStuffing,
376
377    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
378    /// Improper revenue recognition timing (ASC 606/IFRS 15).
379    ImproperRevenueRecognition,
380    /// Multiple performance obligations not properly separated.
381    ImproperPoAllocation,
382    /// Variable consideration not properly estimated.
383    VariableConsiderationManipulation,
384    /// Contract modifications not properly accounted for.
385    ContractModificationMisstatement,
386
387    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
388    /// Lease classification manipulation (operating vs finance).
389    LeaseClassificationManipulation,
390    /// Off-balance sheet lease fraud.
391    OffBalanceSheetLease,
392    /// Lease liability understatement.
393    LeaseLiabilityUnderstatement,
394    /// ROU asset misstatement.
395    RouAssetMisstatement,
396
397    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
398    /// Fair value hierarchy misclassification.
399    FairValueHierarchyManipulation,
400    /// Level 3 input manipulation.
401    Level3InputManipulation,
402    /// Valuation technique manipulation.
403    ValuationTechniqueManipulation,
404
405    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
406    /// Delayed impairment recognition.
407    DelayedImpairment,
408    /// Improperly avoiding impairment testing.
409    ImpairmentTestAvoidance,
410    /// Cash flow projection manipulation for impairment.
411    CashFlowProjectionManipulation,
412    /// Improper impairment reversal (IFRS only).
413    ImproperImpairmentReversal,
414
415    // Sourcing/Procurement Fraud (S2C)
416    /// Bid rigging or collusion among bidders.
417    BidRigging,
418    /// Contracts with phantom/shell vendors.
419    PhantomVendorContract,
420    /// Splitting contracts to avoid approval thresholds.
421    SplitContractThreshold,
422    /// Conflict of interest in sourcing decisions.
423    ConflictOfInterestSourcing,
424
425    // HR/Payroll Fraud (H2R)
426    /// Ghost employee on payroll.
427    GhostEmployeePayroll,
428    /// Payroll inflation/unauthorized raises.
429    PayrollInflation,
430    /// Duplicate expense report submission.
431    DuplicateExpenseReport,
432    /// Fictitious expense claims.
433    FictitiousExpense,
434    /// Splitting expenses to avoid approval threshold.
435    SplitExpenseToAvoidApproval,
436
437    // O2C Fraud
438    /// Revenue timing manipulation via quotes.
439    RevenueTimingManipulation,
440    /// Overriding quote prices without authorization.
441    QuotePriceOverride,
442}
443
444impl FraudType {
445    /// Returns severity level (1-5).
446    pub fn severity(&self) -> u8 {
447        match self {
448            FraudType::RoundDollarManipulation => 2,
449            FraudType::JustBelowThreshold => 3,
450            FraudType::SelfApproval => 3,
451            FraudType::ExceededApprovalLimit => 3,
452            FraudType::DuplicatePayment => 3,
453            FraudType::FictitiousEntry => 4,
454            FraudType::RevenueManipulation => 5,
455            FraudType::FictitiousVendor => 5,
456            FraudType::ShellCompanyPayment => 5,
457            FraudType::AssetMisappropriation => 5,
458            FraudType::SegregationOfDutiesViolation => 4,
459            FraudType::CollusiveApproval => 5,
460            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
461            FraudType::ImproperRevenueRecognition => 5,
462            FraudType::ImproperPoAllocation => 4,
463            FraudType::VariableConsiderationManipulation => 4,
464            FraudType::ContractModificationMisstatement => 3,
465            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
466            FraudType::LeaseClassificationManipulation => 4,
467            FraudType::OffBalanceSheetLease => 5,
468            FraudType::LeaseLiabilityUnderstatement => 4,
469            FraudType::RouAssetMisstatement => 3,
470            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
471            FraudType::FairValueHierarchyManipulation => 4,
472            FraudType::Level3InputManipulation => 5,
473            FraudType::ValuationTechniqueManipulation => 4,
474            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
475            FraudType::DelayedImpairment => 4,
476            FraudType::ImpairmentTestAvoidance => 4,
477            FraudType::CashFlowProjectionManipulation => 5,
478            FraudType::ImproperImpairmentReversal => 3,
479            _ => 4,
480        }
481    }
482}
483
484/// Error types for error detection.
485#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
486pub enum ErrorType {
487    // Data Entry Errors
488    /// Duplicate document entry.
489    DuplicateEntry,
490    /// Reversed debit/credit amounts.
491    ReversedAmount,
492    /// Transposed digits in amount.
493    TransposedDigits,
494    /// Wrong decimal placement.
495    DecimalError,
496    /// Missing required field.
497    MissingField,
498    /// Invalid account code.
499    InvalidAccount,
500
501    // Timing Errors
502    /// Posted to wrong period.
503    WrongPeriod,
504    /// Backdated entry.
505    BackdatedEntry,
506    /// Future-dated entry.
507    FutureDatedEntry,
508    /// Cutoff error.
509    CutoffError,
510
511    // Classification Errors
512    /// Wrong account classification.
513    MisclassifiedAccount,
514    /// Wrong cost center.
515    WrongCostCenter,
516    /// Wrong company code.
517    WrongCompanyCode,
518
519    // Calculation Errors
520    /// Unbalanced journal entry.
521    UnbalancedEntry,
522    /// Rounding error.
523    RoundingError,
524    /// Currency conversion error.
525    CurrencyError,
526    /// Tax calculation error.
527    TaxCalculationError,
528
529    // Accounting Standards Errors (Non-Fraudulent)
530    /// Wrong revenue recognition timing (honest mistake).
531    RevenueTimingError,
532    /// Performance obligation allocation error.
533    PoAllocationError,
534    /// Lease classification error (operating vs finance).
535    LeaseClassificationError,
536    /// Lease calculation error (PV, amortization).
537    LeaseCalculationError,
538    /// Fair value measurement error.
539    FairValueError,
540    /// Impairment calculation error.
541    ImpairmentCalculationError,
542    /// Discount rate error.
543    DiscountRateError,
544    /// Framework application error (IFRS vs GAAP).
545    FrameworkApplicationError,
546}
547
548impl ErrorType {
549    /// Returns severity level (1-5).
550    pub fn severity(&self) -> u8 {
551        match self {
552            ErrorType::RoundingError => 1,
553            ErrorType::MissingField => 2,
554            ErrorType::TransposedDigits => 2,
555            ErrorType::DecimalError => 3,
556            ErrorType::DuplicateEntry => 3,
557            ErrorType::ReversedAmount => 3,
558            ErrorType::WrongPeriod => 4,
559            ErrorType::UnbalancedEntry => 5,
560            ErrorType::CurrencyError => 4,
561            // Accounting Standards Errors
562            ErrorType::RevenueTimingError => 4,
563            ErrorType::PoAllocationError => 3,
564            ErrorType::LeaseClassificationError => 3,
565            ErrorType::LeaseCalculationError => 3,
566            ErrorType::FairValueError => 4,
567            ErrorType::ImpairmentCalculationError => 4,
568            ErrorType::DiscountRateError => 3,
569            ErrorType::FrameworkApplicationError => 4,
570            _ => 3,
571        }
572    }
573}
574
575/// Process issue types.
576#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
577pub enum ProcessIssueType {
578    // Approval Issues
579    /// Approval skipped entirely.
580    SkippedApproval,
581    /// Late approval (after posting).
582    LateApproval,
583    /// Missing supporting documentation.
584    MissingDocumentation,
585    /// Incomplete approval chain.
586    IncompleteApprovalChain,
587
588    // Timing Issues
589    /// Late posting.
590    LatePosting,
591    /// Posting outside business hours.
592    AfterHoursPosting,
593    /// Weekend/holiday posting.
594    WeekendPosting,
595    /// Rushed period-end posting.
596    RushedPeriodEnd,
597    /// Entry posted after the period-end close date (ISA 240.32).
598    /// Distinct from `RushedPeriodEnd` which flags pre-close volume spikes —
599    /// this variant specifically marks post-close adjustments.
600    PostClosePosting,
601
602    // Control Issues
603    /// Manual override of system control.
604    ManualOverride,
605    /// Unusual user access pattern.
606    UnusualAccess,
607    /// System bypass.
608    SystemBypass,
609    /// Batch processing anomaly.
610    BatchAnomaly,
611
612    // Documentation Issues
613    /// Vague or missing description.
614    VagueDescription,
615    /// Changed after posting.
616    PostFactoChange,
617    /// Incomplete audit trail.
618    IncompleteAuditTrail,
619
620    // Sourcing/Procurement Issues (S2C)
621    /// Purchasing outside of contracts (maverick spend).
622    MaverickSpend,
623    /// Purchasing against an expired contract.
624    ExpiredContractPurchase,
625    /// Overriding contracted price without authorization.
626    ContractPriceOverride,
627    /// Award given with only a single bid received.
628    SingleBidAward,
629    /// Bypassing supplier qualification requirements.
630    QualificationBypass,
631
632    // O2C Issues
633    /// Converting an expired quote to a sales order.
634    ExpiredQuoteConversion,
635}
636
637impl ProcessIssueType {
638    /// Returns severity level (1-5).
639    pub fn severity(&self) -> u8 {
640        match self {
641            ProcessIssueType::VagueDescription => 1,
642            ProcessIssueType::LatePosting => 2,
643            ProcessIssueType::AfterHoursPosting => 2,
644            ProcessIssueType::WeekendPosting => 2,
645            ProcessIssueType::PostClosePosting => 4,
646            ProcessIssueType::SkippedApproval => 4,
647            ProcessIssueType::ManualOverride => 4,
648            ProcessIssueType::SystemBypass => 5,
649            ProcessIssueType::IncompleteAuditTrail => 4,
650            _ => 3,
651        }
652    }
653}
654
655/// Statistical anomaly types.
656#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
657pub enum StatisticalAnomalyType {
658    // Amount Anomalies
659    /// Amount significantly above normal.
660    UnusuallyHighAmount,
661    /// Amount significantly below normal.
662    UnusuallyLowAmount,
663    /// Violates Benford's Law distribution.
664    BenfordViolation,
665    /// Exact duplicate amount (suspicious).
666    ExactDuplicateAmount,
667    /// Repeating pattern in amounts.
668    RepeatingAmount,
669
670    // Frequency Anomalies
671    /// Unusual transaction frequency.
672    UnusualFrequency,
673    /// Burst of transactions.
674    TransactionBurst,
675    /// Unusual time of day.
676    UnusualTiming,
677
678    // Trend Anomalies
679    /// Break in historical trend.
680    TrendBreak,
681    /// Sudden level shift.
682    LevelShift,
683    /// Seasonal pattern violation.
684    SeasonalAnomaly,
685
686    // Distribution Anomalies
687    /// Outlier in distribution.
688    StatisticalOutlier,
689    /// Change in variance.
690    VarianceChange,
691    /// Distribution shift.
692    DistributionShift,
693
694    // Sourcing/Contract Anomalies
695    /// Pattern of SLA breaches from a vendor.
696    SlaBreachPattern,
697    /// Contract with zero utilization.
698    UnusedContract,
699
700    // HR/Payroll Anomalies
701    /// Anomalous overtime patterns.
702    OvertimeAnomaly,
703}
704
705impl StatisticalAnomalyType {
706    /// Returns severity level (1-5).
707    pub fn severity(&self) -> u8 {
708        match self {
709            StatisticalAnomalyType::UnusualTiming => 1,
710            StatisticalAnomalyType::UnusualFrequency => 2,
711            StatisticalAnomalyType::BenfordViolation => 2,
712            StatisticalAnomalyType::UnusuallyHighAmount => 3,
713            StatisticalAnomalyType::TrendBreak => 3,
714            StatisticalAnomalyType::TransactionBurst => 4,
715            StatisticalAnomalyType::ExactDuplicateAmount => 3,
716            _ => 3,
717        }
718    }
719}
720
721/// Relational/graph anomaly types.
722#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
723pub enum RelationalAnomalyType {
724    // Transaction Pattern Anomalies
725    /// Circular transaction pattern.
726    CircularTransaction,
727    /// Unusual account combination.
728    UnusualAccountPair,
729    /// New trading partner.
730    NewCounterparty,
731    /// Dormant account suddenly active.
732    DormantAccountActivity,
733
734    // Network Anomalies
735    /// Unusual network centrality.
736    CentralityAnomaly,
737    /// Isolated transaction cluster.
738    IsolatedCluster,
739    /// Bridge node anomaly.
740    BridgeNodeAnomaly,
741    /// Community structure change.
742    CommunityAnomaly,
743
744    // Relationship Anomalies
745    /// Missing expected relationship.
746    MissingRelationship,
747    /// Unexpected relationship.
748    UnexpectedRelationship,
749    /// Relationship strength change.
750    RelationshipStrengthChange,
751
752    // Intercompany Anomalies
753    /// Unmatched intercompany transaction.
754    UnmatchedIntercompany,
755    /// Circular intercompany flow.
756    CircularIntercompany,
757    /// Transfer pricing anomaly.
758    TransferPricingAnomaly,
759}
760
761impl RelationalAnomalyType {
762    /// Returns severity level (1-5).
763    pub fn severity(&self) -> u8 {
764        match self {
765            RelationalAnomalyType::NewCounterparty => 1,
766            RelationalAnomalyType::DormantAccountActivity => 2,
767            RelationalAnomalyType::UnusualAccountPair => 2,
768            RelationalAnomalyType::CircularTransaction => 4,
769            RelationalAnomalyType::CircularIntercompany => 4,
770            RelationalAnomalyType::TransferPricingAnomaly => 4,
771            RelationalAnomalyType::UnmatchedIntercompany => 3,
772            _ => 3,
773        }
774    }
775}
776
777/// A labeled anomaly for supervised learning.
778#[derive(Debug, Clone, Serialize, Deserialize)]
779pub struct LabeledAnomaly {
780    /// Unique anomaly identifier.
781    pub anomaly_id: String,
782    /// Type of anomaly.
783    pub anomaly_type: AnomalyType,
784    /// Document or entity that contains the anomaly.
785    pub document_id: String,
786    /// Document type (JE, PO, Invoice, etc.).
787    pub document_type: String,
788    /// Company code.
789    pub company_code: String,
790    /// Date the anomaly occurred.
791    pub anomaly_date: NaiveDate,
792    /// Timestamp when detected/injected.
793    #[serde(with = "crate::serde_timestamp::naive")]
794    pub detection_timestamp: NaiveDateTime,
795    /// Confidence score (0.0 - 1.0) for injected anomalies.
796    pub confidence: f64,
797    /// Severity (1-5).
798    pub severity: u8,
799    /// Description of the anomaly.
800    pub description: String,
801    /// Related entities (user IDs, account codes, etc.).
802    pub related_entities: Vec<String>,
803    /// Monetary impact if applicable.
804    pub monetary_impact: Option<Decimal>,
805    /// Additional metadata.
806    pub metadata: HashMap<String, String>,
807    /// Whether this was injected (true) or naturally occurring (false).
808    pub is_injected: bool,
809    /// Injection strategy used (if injected) - legacy string field.
810    pub injection_strategy: Option<String>,
811    /// Cluster ID if part of an anomaly cluster.
812    pub cluster_id: Option<String>,
813
814    // ========================================
815    // PROVENANCE TRACKING FIELDS (Phase 1.2)
816    // ========================================
817    /// Hash of the original document before modification.
818    /// Enables tracking what the document looked like pre-injection.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub original_document_hash: Option<String>,
821
822    /// Causal reason explaining why this anomaly was injected.
823    /// Provides "why" tracking for each anomaly.
824    #[serde(default, skip_serializing_if = "Option::is_none")]
825    pub causal_reason: Option<AnomalyCausalReason>,
826
827    /// Structured injection strategy with parameters.
828    /// More detailed than the legacy string-based injection_strategy field.
829    #[serde(default, skip_serializing_if = "Option::is_none")]
830    pub structured_strategy: Option<InjectionStrategy>,
831
832    /// Parent anomaly ID if this was derived from another anomaly.
833    /// Enables anomaly transformation chains.
834    #[serde(default, skip_serializing_if = "Option::is_none")]
835    pub parent_anomaly_id: Option<String>,
836
837    /// Child anomaly IDs that were derived from this anomaly.
838    #[serde(default, skip_serializing_if = "Vec::is_empty")]
839    pub child_anomaly_ids: Vec<String>,
840
841    /// Scenario ID if this anomaly is part of a multi-step scenario.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub scenario_id: Option<String>,
844
845    /// Generation run ID that produced this anomaly.
846    /// Enables tracing anomalies back to their generation run.
847    #[serde(default, skip_serializing_if = "Option::is_none")]
848    pub run_id: Option<String>,
849
850    /// Seed used for RNG during generation.
851    /// Enables reproducibility.
852    #[serde(default, skip_serializing_if = "Option::is_none")]
853    pub generation_seed: Option<u64>,
854}
855
856impl LabeledAnomaly {
857    /// Creates a new labeled anomaly.
858    pub fn new(
859        anomaly_id: String,
860        anomaly_type: AnomalyType,
861        document_id: String,
862        document_type: String,
863        company_code: String,
864        anomaly_date: NaiveDate,
865    ) -> Self {
866        let severity = anomaly_type.severity();
867        let description = format!(
868            "{} - {} in document {}",
869            anomaly_type.category(),
870            anomaly_type.type_name(),
871            document_id
872        );
873
874        Self {
875            anomaly_id,
876            anomaly_type,
877            document_id,
878            document_type,
879            company_code,
880            anomaly_date,
881            detection_timestamp: chrono::Local::now().naive_local(),
882            confidence: 1.0,
883            severity,
884            description,
885            related_entities: Vec::new(),
886            monetary_impact: None,
887            metadata: HashMap::new(),
888            is_injected: true,
889            injection_strategy: None,
890            cluster_id: None,
891            // Provenance fields
892            original_document_hash: None,
893            causal_reason: None,
894            structured_strategy: None,
895            parent_anomaly_id: None,
896            child_anomaly_ids: Vec::new(),
897            scenario_id: None,
898            run_id: None,
899            generation_seed: None,
900        }
901    }
902
903    /// Sets the description.
904    pub fn with_description(mut self, description: &str) -> Self {
905        self.description = description.to_string();
906        self
907    }
908
909    /// Sets the monetary impact.
910    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
911        self.monetary_impact = Some(impact);
912        self
913    }
914
915    /// Adds a related entity.
916    pub fn with_related_entity(mut self, entity: &str) -> Self {
917        self.related_entities.push(entity.to_string());
918        self
919    }
920
921    /// Adds metadata.
922    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
923        self.metadata.insert(key.to_string(), value.to_string());
924        self
925    }
926
927    /// Sets the injection strategy (legacy string).
928    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
929        self.injection_strategy = Some(strategy.to_string());
930        self
931    }
932
933    /// Sets the cluster ID.
934    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
935        self.cluster_id = Some(cluster_id.to_string());
936        self
937    }
938
939    // ========================================
940    // PROVENANCE BUILDER METHODS (Phase 1.2)
941    // ========================================
942
943    /// Sets the original document hash for provenance tracking.
944    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
945        self.original_document_hash = Some(hash.to_string());
946        self
947    }
948
949    /// Sets the causal reason for this anomaly.
950    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
951        self.causal_reason = Some(reason);
952        self
953    }
954
955    /// Sets the structured injection strategy.
956    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
957        // Also set the legacy string field for backward compatibility
958        self.injection_strategy = Some(strategy.strategy_type().to_string());
959        self.structured_strategy = Some(strategy);
960        self
961    }
962
963    /// Sets the parent anomaly ID (for anomaly derivation chains).
964    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
965        self.parent_anomaly_id = Some(parent_id.to_string());
966        self
967    }
968
969    /// Adds a child anomaly ID.
970    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
971        self.child_anomaly_ids.push(child_id.to_string());
972        self
973    }
974
975    /// Sets the scenario ID for multi-step scenario tracking.
976    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
977        self.scenario_id = Some(scenario_id.to_string());
978        self
979    }
980
981    /// Sets the generation run ID.
982    pub fn with_run_id(mut self, run_id: &str) -> Self {
983        self.run_id = Some(run_id.to_string());
984        self
985    }
986
987    /// Sets the generation seed for reproducibility.
988    pub fn with_generation_seed(mut self, seed: u64) -> Self {
989        self.generation_seed = Some(seed);
990        self
991    }
992
993    /// Sets multiple provenance fields at once for convenience.
994    pub fn with_provenance(
995        mut self,
996        run_id: Option<&str>,
997        seed: Option<u64>,
998        causal_reason: Option<AnomalyCausalReason>,
999    ) -> Self {
1000        if let Some(id) = run_id {
1001            self.run_id = Some(id.to_string());
1002        }
1003        self.generation_seed = seed;
1004        self.causal_reason = causal_reason;
1005        self
1006    }
1007
1008    /// Converts to a feature vector for ML.
1009    ///
1010    /// Returns a vector of 15 features:
1011    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1012    /// - 1 feature: Severity (normalized 0-1)
1013    /// - 1 feature: Confidence
1014    /// - 1 feature: Has monetary impact (0/1)
1015    /// - 1 feature: Monetary impact (log-scaled)
1016    /// - 1 feature: Is intentional (0/1)
1017    /// - 1 feature: Number of related entities
1018    /// - 1 feature: Is part of cluster (0/1)
1019    /// - 1 feature: Is part of scenario (0/1)
1020    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1021    pub fn to_features(&self) -> Vec<f64> {
1022        let mut features = Vec::new();
1023
1024        // Category one-hot encoding
1025        let categories = [
1026            "Fraud",
1027            "Error",
1028            "ProcessIssue",
1029            "Statistical",
1030            "Relational",
1031            "Custom",
1032        ];
1033        for cat in &categories {
1034            features.push(if self.anomaly_type.category() == *cat {
1035                1.0
1036            } else {
1037                0.0
1038            });
1039        }
1040
1041        // Severity (normalized)
1042        features.push(self.severity as f64 / 5.0);
1043
1044        // Confidence
1045        features.push(self.confidence);
1046
1047        // Has monetary impact
1048        features.push(if self.monetary_impact.is_some() {
1049            1.0
1050        } else {
1051            0.0
1052        });
1053
1054        // Monetary impact (log-scaled)
1055        if let Some(impact) = self.monetary_impact {
1056            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1057            features.push((impact_f64.abs() + 1.0).ln());
1058        } else {
1059            features.push(0.0);
1060        }
1061
1062        // Is intentional
1063        features.push(if self.anomaly_type.is_intentional() {
1064            1.0
1065        } else {
1066            0.0
1067        });
1068
1069        // Number of related entities
1070        features.push(self.related_entities.len() as f64);
1071
1072        // Is part of cluster
1073        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1074
1075        // Provenance features
1076        // Is part of scenario
1077        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1078
1079        // Has parent anomaly (indicates this is a derived anomaly)
1080        features.push(if self.parent_anomaly_id.is_some() {
1081            1.0
1082        } else {
1083            0.0
1084        });
1085
1086        features
1087    }
1088
1089    /// Returns the number of features in the feature vector.
1090    pub fn feature_count() -> usize {
1091        15 // 6 category + 9 other features
1092    }
1093
1094    /// Returns feature names for documentation/ML metadata.
1095    pub fn feature_names() -> Vec<&'static str> {
1096        vec![
1097            "category_fraud",
1098            "category_error",
1099            "category_process_issue",
1100            "category_statistical",
1101            "category_relational",
1102            "category_custom",
1103            "severity_normalized",
1104            "confidence",
1105            "has_monetary_impact",
1106            "monetary_impact_log",
1107            "is_intentional",
1108            "related_entity_count",
1109            "is_clustered",
1110            "is_scenario_part",
1111            "is_derived",
1112        ]
1113    }
1114}
1115
1116/// Summary of anomalies for reporting.
1117#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1118pub struct AnomalySummary {
1119    /// Total anomaly count.
1120    pub total_count: usize,
1121    /// Count by category.
1122    pub by_category: HashMap<String, usize>,
1123    /// Count by specific type.
1124    pub by_type: HashMap<String, usize>,
1125    /// Count by severity.
1126    pub by_severity: HashMap<u8, usize>,
1127    /// Count by company.
1128    pub by_company: HashMap<String, usize>,
1129    /// Total monetary impact.
1130    pub total_monetary_impact: Decimal,
1131    /// Date range.
1132    pub date_range: Option<(NaiveDate, NaiveDate)>,
1133    /// Number of clusters.
1134    pub cluster_count: usize,
1135}
1136
1137impl AnomalySummary {
1138    /// Creates a summary from a list of anomalies.
1139    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1140        let mut summary = AnomalySummary {
1141            total_count: anomalies.len(),
1142            ..Default::default()
1143        };
1144
1145        let mut min_date: Option<NaiveDate> = None;
1146        let mut max_date: Option<NaiveDate> = None;
1147        let mut clusters = std::collections::HashSet::new();
1148
1149        for anomaly in anomalies {
1150            // By category
1151            *summary
1152                .by_category
1153                .entry(anomaly.anomaly_type.category().to_string())
1154                .or_insert(0) += 1;
1155
1156            // By type
1157            *summary
1158                .by_type
1159                .entry(anomaly.anomaly_type.type_name())
1160                .or_insert(0) += 1;
1161
1162            // By severity
1163            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1164
1165            // By company
1166            *summary
1167                .by_company
1168                .entry(anomaly.company_code.clone())
1169                .or_insert(0) += 1;
1170
1171            // Monetary impact
1172            if let Some(impact) = anomaly.monetary_impact {
1173                summary.total_monetary_impact += impact;
1174            }
1175
1176            // Date range
1177            match min_date {
1178                None => min_date = Some(anomaly.anomaly_date),
1179                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1180                _ => {}
1181            }
1182            match max_date {
1183                None => max_date = Some(anomaly.anomaly_date),
1184                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1185                _ => {}
1186            }
1187
1188            // Clusters
1189            if let Some(cluster_id) = &anomaly.cluster_id {
1190                clusters.insert(cluster_id.clone());
1191            }
1192        }
1193
1194        summary.date_range = min_date.zip(max_date);
1195        summary.cluster_count = clusters.len();
1196
1197        summary
1198    }
1199}
1200
1201// ============================================================================
1202// ENHANCED ANOMALY TAXONOMY (FR-003)
1203// ============================================================================
1204
1205/// High-level anomaly category for multi-class classification.
1206///
1207/// These categories provide a more granular classification than the base
1208/// AnomalyType enum, enabling better ML model training and audit reporting.
1209#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1210pub enum AnomalyCategory {
1211    // Vendor-related anomalies
1212    /// Fictitious or shell vendor.
1213    FictitiousVendor,
1214    /// Kickback or collusion with vendor.
1215    VendorKickback,
1216    /// Related party vendor transactions.
1217    RelatedPartyVendor,
1218
1219    // Transaction-related anomalies
1220    /// Duplicate payment or invoice.
1221    DuplicatePayment,
1222    /// Unauthorized transaction.
1223    UnauthorizedTransaction,
1224    /// Structured transactions to avoid thresholds.
1225    StructuredTransaction,
1226
1227    // Pattern-based anomalies
1228    /// Circular flow of funds.
1229    CircularFlow,
1230    /// Behavioral anomaly (deviation from normal patterns).
1231    BehavioralAnomaly,
1232    /// Timing-based anomaly.
1233    TimingAnomaly,
1234
1235    // Journal entry anomalies
1236    /// Manual journal entry anomaly.
1237    JournalAnomaly,
1238    /// Manual override of controls.
1239    ManualOverride,
1240    /// Missing approval in chain.
1241    MissingApproval,
1242
1243    // Statistical anomalies
1244    /// Statistical outlier.
1245    StatisticalOutlier,
1246    /// Distribution anomaly (Benford, etc.).
1247    DistributionAnomaly,
1248
1249    // Custom category
1250    /// User-defined category.
1251    Custom(String),
1252}
1253
1254impl AnomalyCategory {
1255    /// Derives an AnomalyCategory from an AnomalyType.
1256    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1257        match anomaly_type {
1258            AnomalyType::Fraud(fraud_type) => match fraud_type {
1259                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1260                    AnomalyCategory::FictitiousVendor
1261                }
1262                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1263                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1264                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1265                    AnomalyCategory::StructuredTransaction
1266                }
1267                FraudType::SelfApproval
1268                | FraudType::UnauthorizedApproval
1269                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1270                FraudType::TimingAnomaly
1271                | FraudType::RoundDollarManipulation
1272                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1273                _ => AnomalyCategory::BehavioralAnomaly,
1274            },
1275            AnomalyType::Error(error_type) => match error_type {
1276                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1277                ErrorType::WrongPeriod
1278                | ErrorType::BackdatedEntry
1279                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1280                _ => AnomalyCategory::JournalAnomaly,
1281            },
1282            AnomalyType::ProcessIssue(process_type) => match process_type {
1283                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1284                    AnomalyCategory::MissingApproval
1285                }
1286                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1287                    AnomalyCategory::ManualOverride
1288                }
1289                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1290                    AnomalyCategory::TimingAnomaly
1291                }
1292                _ => AnomalyCategory::BehavioralAnomaly,
1293            },
1294            AnomalyType::Statistical(stat_type) => match stat_type {
1295                StatisticalAnomalyType::BenfordViolation
1296                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1297                _ => AnomalyCategory::StatisticalOutlier,
1298            },
1299            AnomalyType::Relational(rel_type) => match rel_type {
1300                RelationalAnomalyType::CircularTransaction
1301                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1302                _ => AnomalyCategory::BehavioralAnomaly,
1303            },
1304            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1305        }
1306    }
1307
1308    /// Returns the category name as a string.
1309    pub fn name(&self) -> &str {
1310        match self {
1311            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1312            AnomalyCategory::VendorKickback => "vendor_kickback",
1313            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1314            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1315            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1316            AnomalyCategory::StructuredTransaction => "structured_transaction",
1317            AnomalyCategory::CircularFlow => "circular_flow",
1318            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1319            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1320            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1321            AnomalyCategory::ManualOverride => "manual_override",
1322            AnomalyCategory::MissingApproval => "missing_approval",
1323            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1324            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1325            AnomalyCategory::Custom(s) => s.as_str(),
1326        }
1327    }
1328
1329    /// Returns the ordinal value for ML encoding.
1330    pub fn ordinal(&self) -> u8 {
1331        match self {
1332            AnomalyCategory::FictitiousVendor => 0,
1333            AnomalyCategory::VendorKickback => 1,
1334            AnomalyCategory::RelatedPartyVendor => 2,
1335            AnomalyCategory::DuplicatePayment => 3,
1336            AnomalyCategory::UnauthorizedTransaction => 4,
1337            AnomalyCategory::StructuredTransaction => 5,
1338            AnomalyCategory::CircularFlow => 6,
1339            AnomalyCategory::BehavioralAnomaly => 7,
1340            AnomalyCategory::TimingAnomaly => 8,
1341            AnomalyCategory::JournalAnomaly => 9,
1342            AnomalyCategory::ManualOverride => 10,
1343            AnomalyCategory::MissingApproval => 11,
1344            AnomalyCategory::StatisticalOutlier => 12,
1345            AnomalyCategory::DistributionAnomaly => 13,
1346            AnomalyCategory::Custom(_) => 14,
1347        }
1348    }
1349
1350    /// Returns the total number of categories (excluding Custom).
1351    pub fn category_count() -> usize {
1352        15 // 14 fixed categories + Custom
1353    }
1354}
1355
1356/// Type of contributing factor for anomaly confidence/severity calculation.
1357#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1358pub enum FactorType {
1359    /// Amount deviation from expected value.
1360    AmountDeviation,
1361    /// Proximity to approval/reporting threshold.
1362    ThresholdProximity,
1363    /// Timing-related anomaly indicator.
1364    TimingAnomaly,
1365    /// Entity risk score contribution.
1366    EntityRisk,
1367    /// Pattern match confidence.
1368    PatternMatch,
1369    /// Frequency deviation from normal.
1370    FrequencyDeviation,
1371    /// Relationship-based anomaly indicator.
1372    RelationshipAnomaly,
1373    /// Control bypass indicator.
1374    ControlBypass,
1375    /// Benford's Law violation.
1376    BenfordViolation,
1377    /// Duplicate indicator.
1378    DuplicateIndicator,
1379    /// Approval chain issue.
1380    ApprovalChainIssue,
1381    /// Documentation gap.
1382    DocumentationGap,
1383    /// Custom factor type.
1384    Custom,
1385}
1386
1387impl FactorType {
1388    /// Returns the factor type name.
1389    pub fn name(&self) -> &'static str {
1390        match self {
1391            FactorType::AmountDeviation => "amount_deviation",
1392            FactorType::ThresholdProximity => "threshold_proximity",
1393            FactorType::TimingAnomaly => "timing_anomaly",
1394            FactorType::EntityRisk => "entity_risk",
1395            FactorType::PatternMatch => "pattern_match",
1396            FactorType::FrequencyDeviation => "frequency_deviation",
1397            FactorType::RelationshipAnomaly => "relationship_anomaly",
1398            FactorType::ControlBypass => "control_bypass",
1399            FactorType::BenfordViolation => "benford_violation",
1400            FactorType::DuplicateIndicator => "duplicate_indicator",
1401            FactorType::ApprovalChainIssue => "approval_chain_issue",
1402            FactorType::DocumentationGap => "documentation_gap",
1403            FactorType::Custom => "custom",
1404        }
1405    }
1406}
1407
1408/// Evidence supporting a contributing factor.
1409#[derive(Debug, Clone, Serialize, Deserialize)]
1410pub struct FactorEvidence {
1411    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1412    pub source: String,
1413    /// Raw evidence data.
1414    pub data: HashMap<String, String>,
1415}
1416
1417/// A contributing factor to anomaly confidence/severity.
1418#[derive(Debug, Clone, Serialize, Deserialize)]
1419pub struct ContributingFactor {
1420    /// Type of factor.
1421    pub factor_type: FactorType,
1422    /// Observed value.
1423    pub value: f64,
1424    /// Threshold or expected value.
1425    pub threshold: f64,
1426    /// Direction of comparison (true = value > threshold is anomalous).
1427    pub direction_greater: bool,
1428    /// Weight of this factor in overall calculation (0.0 - 1.0).
1429    pub weight: f64,
1430    /// Human-readable description.
1431    pub description: String,
1432    /// Optional supporting evidence.
1433    pub evidence: Option<FactorEvidence>,
1434}
1435
1436impl ContributingFactor {
1437    /// Creates a new contributing factor.
1438    pub fn new(
1439        factor_type: FactorType,
1440        value: f64,
1441        threshold: f64,
1442        direction_greater: bool,
1443        weight: f64,
1444        description: &str,
1445    ) -> Self {
1446        Self {
1447            factor_type,
1448            value,
1449            threshold,
1450            direction_greater,
1451            weight,
1452            description: description.to_string(),
1453            evidence: None,
1454        }
1455    }
1456
1457    /// Adds evidence to the factor.
1458    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1459        self.evidence = Some(FactorEvidence {
1460            source: source.to_string(),
1461            data,
1462        });
1463        self
1464    }
1465
1466    /// Calculates the factor's contribution to anomaly score.
1467    pub fn contribution(&self) -> f64 {
1468        let deviation = if self.direction_greater {
1469            (self.value - self.threshold).max(0.0)
1470        } else {
1471            (self.threshold - self.value).max(0.0)
1472        };
1473
1474        // Normalize by threshold to get relative deviation
1475        let relative_deviation = if self.threshold.abs() > 0.001 {
1476            deviation / self.threshold.abs()
1477        } else {
1478            deviation
1479        };
1480
1481        // Apply weight and cap at 1.0
1482        (relative_deviation * self.weight).min(1.0)
1483    }
1484}
1485
1486/// Enhanced anomaly label with dynamic confidence and severity.
1487#[derive(Debug, Clone, Serialize, Deserialize)]
1488pub struct EnhancedAnomalyLabel {
1489    /// Base labeled anomaly (backward compatible).
1490    pub base: LabeledAnomaly,
1491    /// Enhanced category classification.
1492    pub category: AnomalyCategory,
1493    /// Dynamically calculated confidence (0.0 - 1.0).
1494    pub enhanced_confidence: f64,
1495    /// Contextually calculated severity (0.0 - 1.0).
1496    pub enhanced_severity: f64,
1497    /// Factors contributing to confidence/severity.
1498    pub contributing_factors: Vec<ContributingFactor>,
1499    /// Secondary categories (for multi-label classification).
1500    pub secondary_categories: Vec<AnomalyCategory>,
1501}
1502
1503impl EnhancedAnomalyLabel {
1504    /// Creates an enhanced label from a base labeled anomaly.
1505    pub fn from_base(base: LabeledAnomaly) -> Self {
1506        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1507        let enhanced_confidence = base.confidence;
1508        let enhanced_severity = base.severity as f64 / 5.0;
1509
1510        Self {
1511            base,
1512            category,
1513            enhanced_confidence,
1514            enhanced_severity,
1515            contributing_factors: Vec::new(),
1516            secondary_categories: Vec::new(),
1517        }
1518    }
1519
1520    /// Sets the enhanced confidence.
1521    pub fn with_confidence(mut self, confidence: f64) -> Self {
1522        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1523        self
1524    }
1525
1526    /// Sets the enhanced severity.
1527    pub fn with_severity(mut self, severity: f64) -> Self {
1528        self.enhanced_severity = severity.clamp(0.0, 1.0);
1529        self
1530    }
1531
1532    /// Adds a contributing factor.
1533    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1534        self.contributing_factors.push(factor);
1535        self
1536    }
1537
1538    /// Adds a secondary category.
1539    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1540        if !self.secondary_categories.contains(&category) && category != self.category {
1541            self.secondary_categories.push(category);
1542        }
1543        self
1544    }
1545
1546    /// Converts to an extended feature vector.
1547    ///
1548    /// Returns base features (15) + enhanced features (10) = 25 features.
1549    pub fn to_features(&self) -> Vec<f64> {
1550        let mut features = self.base.to_features();
1551
1552        // Enhanced features
1553        features.push(self.enhanced_confidence);
1554        features.push(self.enhanced_severity);
1555        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1556        features.push(self.secondary_categories.len() as f64);
1557        features.push(self.contributing_factors.len() as f64);
1558
1559        // Max factor weight
1560        let max_weight = self
1561            .contributing_factors
1562            .iter()
1563            .map(|f| f.weight)
1564            .fold(0.0, f64::max);
1565        features.push(max_weight);
1566
1567        // Factor type indicators (binary flags for key factor types)
1568        let has_control_bypass = self
1569            .contributing_factors
1570            .iter()
1571            .any(|f| f.factor_type == FactorType::ControlBypass);
1572        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1573
1574        let has_amount_deviation = self
1575            .contributing_factors
1576            .iter()
1577            .any(|f| f.factor_type == FactorType::AmountDeviation);
1578        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1579
1580        let has_timing = self
1581            .contributing_factors
1582            .iter()
1583            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1584        features.push(if has_timing { 1.0 } else { 0.0 });
1585
1586        let has_pattern_match = self
1587            .contributing_factors
1588            .iter()
1589            .any(|f| f.factor_type == FactorType::PatternMatch);
1590        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1591
1592        features
1593    }
1594
1595    /// Returns the number of features in the enhanced feature vector.
1596    pub fn feature_count() -> usize {
1597        25 // 15 base + 10 enhanced
1598    }
1599
1600    /// Returns feature names for the enhanced feature vector.
1601    pub fn feature_names() -> Vec<&'static str> {
1602        let mut names = LabeledAnomaly::feature_names();
1603        names.extend(vec![
1604            "enhanced_confidence",
1605            "enhanced_severity",
1606            "category_ordinal",
1607            "secondary_category_count",
1608            "contributing_factor_count",
1609            "max_factor_weight",
1610            "has_control_bypass",
1611            "has_amount_deviation",
1612            "has_timing_factor",
1613            "has_pattern_match",
1614        ]);
1615        names
1616    }
1617}
1618
1619// ============================================================================
1620// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1621// ============================================================================
1622
1623/// Severity level classification for anomalies.
1624#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1625pub enum SeverityLevel {
1626    /// Minor issue, low impact.
1627    Low,
1628    /// Moderate issue, noticeable impact.
1629    #[default]
1630    Medium,
1631    /// Significant issue, substantial impact.
1632    High,
1633    /// Critical issue, severe impact requiring immediate attention.
1634    Critical,
1635}
1636
1637impl SeverityLevel {
1638    /// Returns the numeric value (1-4) for the severity level.
1639    pub fn numeric(&self) -> u8 {
1640        match self {
1641            SeverityLevel::Low => 1,
1642            SeverityLevel::Medium => 2,
1643            SeverityLevel::High => 3,
1644            SeverityLevel::Critical => 4,
1645        }
1646    }
1647
1648    /// Creates a severity level from a numeric value.
1649    pub fn from_numeric(value: u8) -> Self {
1650        match value {
1651            1 => SeverityLevel::Low,
1652            2 => SeverityLevel::Medium,
1653            3 => SeverityLevel::High,
1654            _ => SeverityLevel::Critical,
1655        }
1656    }
1657
1658    /// Creates a severity level from a normalized score (0.0-1.0).
1659    pub fn from_score(score: f64) -> Self {
1660        match score {
1661            s if s < 0.25 => SeverityLevel::Low,
1662            s if s < 0.50 => SeverityLevel::Medium,
1663            s if s < 0.75 => SeverityLevel::High,
1664            _ => SeverityLevel::Critical,
1665        }
1666    }
1667
1668    /// Returns a normalized score (0.0-1.0) for this severity level.
1669    pub fn to_score(&self) -> f64 {
1670        match self {
1671            SeverityLevel::Low => 0.125,
1672            SeverityLevel::Medium => 0.375,
1673            SeverityLevel::High => 0.625,
1674            SeverityLevel::Critical => 0.875,
1675        }
1676    }
1677}
1678
1679/// Structured severity scoring for anomalies.
1680#[derive(Debug, Clone, Serialize, Deserialize)]
1681pub struct AnomalySeverity {
1682    /// Severity level classification.
1683    pub level: SeverityLevel,
1684    /// Continuous severity score (0.0-1.0).
1685    pub score: f64,
1686    /// Absolute financial impact amount.
1687    pub financial_impact: Decimal,
1688    /// Whether this exceeds materiality threshold.
1689    pub is_material: bool,
1690    /// Materiality threshold used for determination.
1691    #[serde(default, skip_serializing_if = "Option::is_none")]
1692    pub materiality_threshold: Option<Decimal>,
1693}
1694
1695impl AnomalySeverity {
1696    /// Creates a new severity assessment.
1697    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1698        Self {
1699            level,
1700            score: level.to_score(),
1701            financial_impact,
1702            is_material: false,
1703            materiality_threshold: None,
1704        }
1705    }
1706
1707    /// Creates severity from a score, auto-determining level.
1708    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1709        Self {
1710            level: SeverityLevel::from_score(score),
1711            score: score.clamp(0.0, 1.0),
1712            financial_impact,
1713            is_material: false,
1714            materiality_threshold: None,
1715        }
1716    }
1717
1718    /// Sets the materiality assessment.
1719    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1720        self.materiality_threshold = Some(threshold);
1721        self.is_material = self.financial_impact.abs() >= threshold;
1722        self
1723    }
1724}
1725
1726impl Default for AnomalySeverity {
1727    fn default() -> Self {
1728        Self {
1729            level: SeverityLevel::Medium,
1730            score: 0.5,
1731            financial_impact: Decimal::ZERO,
1732            is_material: false,
1733            materiality_threshold: None,
1734        }
1735    }
1736}
1737
1738/// Detection difficulty classification for anomalies.
1739///
1740/// Categorizes how difficult an anomaly is to detect, which is useful
1741/// for ML model benchmarking and audit procedure selection.
1742///
1743/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1744/// is used for drift event classification and has different variants.
1745#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1746pub enum AnomalyDetectionDifficulty {
1747    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1748    Trivial,
1749    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1750    Easy,
1751    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1752    #[default]
1753    Moderate,
1754    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1755    Hard,
1756    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1757    Expert,
1758}
1759
1760impl AnomalyDetectionDifficulty {
1761    /// Returns the expected detection rate for this difficulty level.
1762    pub fn expected_detection_rate(&self) -> f64 {
1763        match self {
1764            AnomalyDetectionDifficulty::Trivial => 0.99,
1765            AnomalyDetectionDifficulty::Easy => 0.90,
1766            AnomalyDetectionDifficulty::Moderate => 0.70,
1767            AnomalyDetectionDifficulty::Hard => 0.40,
1768            AnomalyDetectionDifficulty::Expert => 0.15,
1769        }
1770    }
1771
1772    /// Returns a numeric difficulty score (0.0-1.0).
1773    pub fn difficulty_score(&self) -> f64 {
1774        match self {
1775            AnomalyDetectionDifficulty::Trivial => 0.05,
1776            AnomalyDetectionDifficulty::Easy => 0.25,
1777            AnomalyDetectionDifficulty::Moderate => 0.50,
1778            AnomalyDetectionDifficulty::Hard => 0.75,
1779            AnomalyDetectionDifficulty::Expert => 0.95,
1780        }
1781    }
1782
1783    /// Creates a difficulty level from a score (0.0-1.0).
1784    pub fn from_score(score: f64) -> Self {
1785        match score {
1786            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1787            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1788            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1789            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1790            _ => AnomalyDetectionDifficulty::Expert,
1791        }
1792    }
1793
1794    /// Returns the name of this difficulty level.
1795    pub fn name(&self) -> &'static str {
1796        match self {
1797            AnomalyDetectionDifficulty::Trivial => "trivial",
1798            AnomalyDetectionDifficulty::Easy => "easy",
1799            AnomalyDetectionDifficulty::Moderate => "moderate",
1800            AnomalyDetectionDifficulty::Hard => "hard",
1801            AnomalyDetectionDifficulty::Expert => "expert",
1802        }
1803    }
1804}
1805
1806/// Ground truth certainty level for anomaly labels.
1807///
1808/// Indicates how certain we are that the label is correct.
1809#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1810pub enum GroundTruthCertainty {
1811    /// Definitively known (injected anomaly with full provenance).
1812    #[default]
1813    Definite,
1814    /// Highly probable based on strong evidence.
1815    Probable,
1816    /// Possibly an anomaly based on indirect evidence.
1817    Possible,
1818}
1819
1820impl GroundTruthCertainty {
1821    /// Returns a certainty score (0.0-1.0).
1822    pub fn certainty_score(&self) -> f64 {
1823        match self {
1824            GroundTruthCertainty::Definite => 1.0,
1825            GroundTruthCertainty::Probable => 0.8,
1826            GroundTruthCertainty::Possible => 0.5,
1827        }
1828    }
1829
1830    /// Returns the name of this certainty level.
1831    pub fn name(&self) -> &'static str {
1832        match self {
1833            GroundTruthCertainty::Definite => "definite",
1834            GroundTruthCertainty::Probable => "probable",
1835            GroundTruthCertainty::Possible => "possible",
1836        }
1837    }
1838}
1839
1840/// Detection method classification.
1841///
1842/// Indicates which detection methods are recommended or effective for an anomaly.
1843#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1844pub enum DetectionMethod {
1845    /// Simple rule-based detection (thresholds, filters).
1846    RuleBased,
1847    /// Statistical analysis (distributions, outlier detection).
1848    Statistical,
1849    /// Machine learning models (classification, anomaly detection).
1850    MachineLearning,
1851    /// Graph-based analysis (network patterns, relationships).
1852    GraphBased,
1853    /// Manual forensic audit procedures.
1854    ForensicAudit,
1855    /// Combination of multiple methods.
1856    Hybrid,
1857}
1858
1859impl DetectionMethod {
1860    /// Returns the name of this detection method.
1861    pub fn name(&self) -> &'static str {
1862        match self {
1863            DetectionMethod::RuleBased => "rule_based",
1864            DetectionMethod::Statistical => "statistical",
1865            DetectionMethod::MachineLearning => "machine_learning",
1866            DetectionMethod::GraphBased => "graph_based",
1867            DetectionMethod::ForensicAudit => "forensic_audit",
1868            DetectionMethod::Hybrid => "hybrid",
1869        }
1870    }
1871
1872    /// Returns a description of this detection method.
1873    pub fn description(&self) -> &'static str {
1874        match self {
1875            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1876            DetectionMethod::Statistical => "Statistical distribution analysis",
1877            DetectionMethod::MachineLearning => "ML classification models",
1878            DetectionMethod::GraphBased => "Network and relationship analysis",
1879            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1880            DetectionMethod::Hybrid => "Combined multi-method approach",
1881        }
1882    }
1883}
1884
1885/// Extended anomaly label with comprehensive multi-dimensional classification.
1886///
1887/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1888/// severity scoring, detection difficulty, recommended methods, and ground truth.
1889#[derive(Debug, Clone, Serialize, Deserialize)]
1890pub struct ExtendedAnomalyLabel {
1891    /// Base labeled anomaly.
1892    pub base: LabeledAnomaly,
1893    /// Enhanced category classification.
1894    pub category: AnomalyCategory,
1895    /// Structured severity assessment.
1896    pub severity: AnomalySeverity,
1897    /// Detection difficulty classification.
1898    pub detection_difficulty: AnomalyDetectionDifficulty,
1899    /// Recommended detection methods for this anomaly.
1900    pub recommended_methods: Vec<DetectionMethod>,
1901    /// Key indicators that should trigger detection.
1902    pub key_indicators: Vec<String>,
1903    /// Ground truth certainty level.
1904    pub ground_truth_certainty: GroundTruthCertainty,
1905    /// Contributing factors to confidence/severity.
1906    pub contributing_factors: Vec<ContributingFactor>,
1907    /// Related entity IDs (vendors, customers, employees, etc.).
1908    pub related_entity_ids: Vec<String>,
1909    /// Secondary categories for multi-label classification.
1910    pub secondary_categories: Vec<AnomalyCategory>,
1911    /// Scheme ID if part of a multi-stage fraud scheme.
1912    #[serde(default, skip_serializing_if = "Option::is_none")]
1913    pub scheme_id: Option<String>,
1914    /// Stage number within a scheme (1-indexed).
1915    #[serde(default, skip_serializing_if = "Option::is_none")]
1916    pub scheme_stage: Option<u32>,
1917    /// Whether this is a near-miss (suspicious but legitimate).
1918    #[serde(default)]
1919    pub is_near_miss: bool,
1920    /// Explanation if this is a near-miss.
1921    #[serde(default, skip_serializing_if = "Option::is_none")]
1922    pub near_miss_explanation: Option<String>,
1923}
1924
1925impl ExtendedAnomalyLabel {
1926    /// Creates an extended label from a base labeled anomaly.
1927    pub fn from_base(base: LabeledAnomaly) -> Self {
1928        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1929        let severity = AnomalySeverity {
1930            level: SeverityLevel::from_numeric(base.severity),
1931            score: base.severity as f64 / 5.0,
1932            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1933            is_material: false,
1934            materiality_threshold: None,
1935        };
1936
1937        Self {
1938            base,
1939            category,
1940            severity,
1941            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1942            recommended_methods: vec![DetectionMethod::RuleBased],
1943            key_indicators: Vec::new(),
1944            ground_truth_certainty: GroundTruthCertainty::Definite,
1945            contributing_factors: Vec::new(),
1946            related_entity_ids: Vec::new(),
1947            secondary_categories: Vec::new(),
1948            scheme_id: None,
1949            scheme_stage: None,
1950            is_near_miss: false,
1951            near_miss_explanation: None,
1952        }
1953    }
1954
1955    /// Sets the severity assessment.
1956    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1957        self.severity = severity;
1958        self
1959    }
1960
1961    /// Sets the detection difficulty.
1962    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1963        self.detection_difficulty = difficulty;
1964        self
1965    }
1966
1967    /// Adds a recommended detection method.
1968    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1969        if !self.recommended_methods.contains(&method) {
1970            self.recommended_methods.push(method);
1971        }
1972        self
1973    }
1974
1975    /// Sets the recommended detection methods.
1976    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1977        self.recommended_methods = methods;
1978        self
1979    }
1980
1981    /// Adds a key indicator.
1982    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1983        self.key_indicators.push(indicator.into());
1984        self
1985    }
1986
1987    /// Sets the ground truth certainty.
1988    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1989        self.ground_truth_certainty = certainty;
1990        self
1991    }
1992
1993    /// Adds a contributing factor.
1994    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1995        self.contributing_factors.push(factor);
1996        self
1997    }
1998
1999    /// Adds a related entity ID.
2000    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
2001        self.related_entity_ids.push(entity_id.into());
2002        self
2003    }
2004
2005    /// Adds a secondary category.
2006    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2007        if category != self.category && !self.secondary_categories.contains(&category) {
2008            self.secondary_categories.push(category);
2009        }
2010        self
2011    }
2012
2013    /// Sets scheme information.
2014    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2015        self.scheme_id = Some(scheme_id.into());
2016        self.scheme_stage = Some(stage);
2017        self
2018    }
2019
2020    /// Marks this as a near-miss with explanation.
2021    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2022        self.is_near_miss = true;
2023        self.near_miss_explanation = Some(explanation.into());
2024        self
2025    }
2026
2027    /// Converts to an extended feature vector for ML.
2028    ///
2029    /// Returns base features (15) + extended features (15) = 30 features.
2030    pub fn to_features(&self) -> Vec<f64> {
2031        let mut features = self.base.to_features();
2032
2033        // Extended features
2034        features.push(self.severity.score);
2035        features.push(self.severity.level.to_score());
2036        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2037        features.push(self.detection_difficulty.difficulty_score());
2038        features.push(self.detection_difficulty.expected_detection_rate());
2039        features.push(self.ground_truth_certainty.certainty_score());
2040        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2041        features.push(self.secondary_categories.len() as f64);
2042        features.push(self.contributing_factors.len() as f64);
2043        features.push(self.key_indicators.len() as f64);
2044        features.push(self.recommended_methods.len() as f64);
2045        features.push(self.related_entity_ids.len() as f64);
2046        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2047        features.push(self.scheme_stage.unwrap_or(0) as f64);
2048        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2049
2050        features
2051    }
2052
2053    /// Returns the number of features in the extended feature vector.
2054    pub fn feature_count() -> usize {
2055        30 // 15 base + 15 extended
2056    }
2057
2058    /// Returns feature names for the extended feature vector.
2059    pub fn feature_names() -> Vec<&'static str> {
2060        let mut names = LabeledAnomaly::feature_names();
2061        names.extend(vec![
2062            "severity_score",
2063            "severity_level_score",
2064            "is_material",
2065            "difficulty_score",
2066            "expected_detection_rate",
2067            "ground_truth_certainty",
2068            "category_ordinal",
2069            "secondary_category_count",
2070            "contributing_factor_count",
2071            "key_indicator_count",
2072            "recommended_method_count",
2073            "related_entity_count",
2074            "is_part_of_scheme",
2075            "scheme_stage",
2076            "is_near_miss",
2077        ]);
2078        names
2079    }
2080}
2081
2082// ============================================================================
2083// MULTI-STAGE FRAUD SCHEME TYPES
2084// ============================================================================
2085
2086/// Type of multi-stage fraud scheme.
2087#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2088pub enum SchemeType {
2089    /// Gradual embezzlement over time.
2090    GradualEmbezzlement,
2091    /// Revenue manipulation across periods.
2092    RevenueManipulation,
2093    /// Vendor kickback scheme.
2094    VendorKickback,
2095    /// Round-tripping funds through multiple entities.
2096    RoundTripping,
2097    /// Ghost employee scheme.
2098    GhostEmployee,
2099    /// Expense reimbursement fraud.
2100    ExpenseReimbursement,
2101    /// Inventory theft scheme.
2102    InventoryTheft,
2103    /// Custom scheme type.
2104    Custom,
2105}
2106
2107impl SchemeType {
2108    /// Returns the name of this scheme type.
2109    pub fn name(&self) -> &'static str {
2110        match self {
2111            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2112            SchemeType::RevenueManipulation => "revenue_manipulation",
2113            SchemeType::VendorKickback => "vendor_kickback",
2114            SchemeType::RoundTripping => "round_tripping",
2115            SchemeType::GhostEmployee => "ghost_employee",
2116            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2117            SchemeType::InventoryTheft => "inventory_theft",
2118            SchemeType::Custom => "custom",
2119        }
2120    }
2121
2122    /// Returns the typical number of stages for this scheme type.
2123    pub fn typical_stages(&self) -> u32 {
2124        match self {
2125            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2126            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2127            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2128            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2129            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2130            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2131            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2132            SchemeType::Custom => 4,
2133        }
2134    }
2135}
2136
2137/// Status of detection for a fraud scheme.
2138#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2139pub enum SchemeDetectionStatus {
2140    /// Scheme is undetected.
2141    #[default]
2142    Undetected,
2143    /// Under investigation but not confirmed.
2144    UnderInvestigation,
2145    /// Partially detected (some transactions flagged).
2146    PartiallyDetected,
2147    /// Fully detected and confirmed.
2148    FullyDetected,
2149}
2150
2151/// Reference to a transaction within a scheme.
2152#[derive(Debug, Clone, Serialize, Deserialize)]
2153pub struct SchemeTransactionRef {
2154    /// Document ID of the transaction.
2155    pub document_id: String,
2156    /// Transaction date.
2157    pub date: chrono::NaiveDate,
2158    /// Transaction amount.
2159    pub amount: Decimal,
2160    /// Stage this transaction belongs to.
2161    pub stage: u32,
2162    /// Anomaly ID if labeled.
2163    #[serde(default, skip_serializing_if = "Option::is_none")]
2164    pub anomaly_id: Option<String>,
2165}
2166
2167/// Concealment technique used in fraud.
2168#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2169pub enum ConcealmentTechnique {
2170    /// Document manipulation or forgery.
2171    DocumentManipulation,
2172    /// Circumventing approval processes.
2173    ApprovalCircumvention,
2174    /// Exploiting timing (period-end, holidays).
2175    TimingExploitation,
2176    /// Transaction splitting to avoid thresholds.
2177    TransactionSplitting,
2178    /// Account misclassification.
2179    AccountMisclassification,
2180    /// Collusion with other employees.
2181    Collusion,
2182    /// Data alteration or deletion.
2183    DataAlteration,
2184    /// Creating false documentation.
2185    FalseDocumentation,
2186}
2187
2188impl ConcealmentTechnique {
2189    /// Returns the difficulty bonus this technique adds.
2190    pub fn difficulty_bonus(&self) -> f64 {
2191        match self {
2192            ConcealmentTechnique::DocumentManipulation => 0.20,
2193            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2194            ConcealmentTechnique::TimingExploitation => 0.10,
2195            ConcealmentTechnique::TransactionSplitting => 0.15,
2196            ConcealmentTechnique::AccountMisclassification => 0.10,
2197            ConcealmentTechnique::Collusion => 0.25,
2198            ConcealmentTechnique::DataAlteration => 0.20,
2199            ConcealmentTechnique::FalseDocumentation => 0.15,
2200        }
2201    }
2202}
2203
2204// ============================================================================
2205// ACFE-ALIGNED FRAUD TAXONOMY
2206// ============================================================================
2207//
2208// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2209// Nations: Occupational Fraud Classification System. This taxonomy provides
2210// ACFE-aligned categories, schemes, and calibration data.
2211
2212/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2213///
2214/// ACFE Report to the Nations statistics (typical):
2215/// - Asset Misappropriation: 86% of cases, $100k median loss
2216/// - Corruption: 33% of cases, $150k median loss
2217/// - Financial Statement Fraud: 10% of cases, $954k median loss
2218///
2219/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2220#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2221pub enum AcfeFraudCategory {
2222    /// Theft of organizational assets (cash, inventory, equipment).
2223    /// Most common (86% of cases) but typically lowest median loss ($100k).
2224    #[default]
2225    AssetMisappropriation,
2226    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2227    /// Medium frequency (33% of cases), medium median loss ($150k).
2228    Corruption,
2229    /// Intentional misstatement of financial statements.
2230    /// Least common (10% of cases) but highest median loss ($954k).
2231    FinancialStatementFraud,
2232}
2233
2234impl AcfeFraudCategory {
2235    /// Returns the name of this category.
2236    pub fn name(&self) -> &'static str {
2237        match self {
2238            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2239            AcfeFraudCategory::Corruption => "corruption",
2240            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2241        }
2242    }
2243
2244    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2245    pub fn typical_occurrence_rate(&self) -> f64 {
2246        match self {
2247            AcfeFraudCategory::AssetMisappropriation => 0.86,
2248            AcfeFraudCategory::Corruption => 0.33,
2249            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2250        }
2251    }
2252
2253    /// Returns the typical median loss amount (from ACFE reports).
2254    pub fn typical_median_loss(&self) -> Decimal {
2255        match self {
2256            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2257            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2258            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2259        }
2260    }
2261
2262    /// Returns the typical detection time in months (from ACFE reports).
2263    pub fn typical_detection_months(&self) -> u32 {
2264        match self {
2265            AcfeFraudCategory::AssetMisappropriation => 12,
2266            AcfeFraudCategory::Corruption => 18,
2267            AcfeFraudCategory::FinancialStatementFraud => 24,
2268        }
2269    }
2270}
2271
2272/// Cash-based fraud schemes under Asset Misappropriation.
2273///
2274/// Organized according to the ACFE Fraud Tree:
2275/// - Theft of Cash on Hand
2276/// - Theft of Cash Receipts
2277/// - Fraudulent Disbursements
2278#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2279pub enum CashFraudScheme {
2280    // ========== Theft of Cash on Hand ==========
2281    /// Stealing cash from cash drawers or safes after it has been recorded.
2282    Larceny,
2283    /// Stealing cash before it is recorded in the books (intercepts receipts).
2284    Skimming,
2285
2286    // ========== Theft of Cash Receipts ==========
2287    /// Skimming from sales transactions before recording.
2288    SalesSkimming,
2289    /// Intercepting customer payments on accounts receivable.
2290    ReceivablesSkimming,
2291    /// Creating false refunds to pocket the difference.
2292    RefundSchemes,
2293
2294    // ========== Fraudulent Disbursements - Billing Schemes ==========
2295    /// Creating fictitious vendors to invoice and pay.
2296    ShellCompany,
2297    /// Manipulating payments to legitimate vendors for personal gain.
2298    NonAccompliceVendor,
2299    /// Using company funds for personal purchases.
2300    PersonalPurchases,
2301
2302    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2303    /// Creating fake employees to collect wages.
2304    GhostEmployee,
2305    /// Falsifying hours worked, sales commissions, or salary rates.
2306    FalsifiedWages,
2307    /// Manipulating commission calculations.
2308    CommissionSchemes,
2309
2310    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2311    /// Claiming non-business expenses as business expenses.
2312    MischaracterizedExpenses,
2313    /// Inflating legitimate expense amounts.
2314    OverstatedExpenses,
2315    /// Creating completely fictitious expenses.
2316    FictitiousExpenses,
2317
2318    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2319    /// Forging the signature of an authorized check signer.
2320    ForgedMaker,
2321    /// Intercepting and altering the endorsement on legitimate checks.
2322    ForgedEndorsement,
2323    /// Altering the payee on a legitimate check.
2324    AlteredPayee,
2325    /// Authorized signer writing checks for personal benefit.
2326    AuthorizedMaker,
2327
2328    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2329    /// Creating false voided transactions.
2330    FalseVoids,
2331    /// Processing fictitious refunds.
2332    FalseRefunds,
2333}
2334
2335impl CashFraudScheme {
2336    /// Returns the ACFE category this scheme belongs to.
2337    pub fn category(&self) -> AcfeFraudCategory {
2338        AcfeFraudCategory::AssetMisappropriation
2339    }
2340
2341    /// Returns the subcategory within the ACFE Fraud Tree.
2342    pub fn subcategory(&self) -> &'static str {
2343        match self {
2344            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2345            CashFraudScheme::SalesSkimming
2346            | CashFraudScheme::ReceivablesSkimming
2347            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2348            CashFraudScheme::ShellCompany
2349            | CashFraudScheme::NonAccompliceVendor
2350            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2351            CashFraudScheme::GhostEmployee
2352            | CashFraudScheme::FalsifiedWages
2353            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2354            CashFraudScheme::MischaracterizedExpenses
2355            | CashFraudScheme::OverstatedExpenses
2356            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2357            CashFraudScheme::ForgedMaker
2358            | CashFraudScheme::ForgedEndorsement
2359            | CashFraudScheme::AlteredPayee
2360            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2361            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2362        }
2363    }
2364
2365    /// Returns the typical severity (1-5) for this scheme.
2366    pub fn severity(&self) -> u8 {
2367        match self {
2368            // Lower severity - often small amounts, easier to detect
2369            CashFraudScheme::FalseVoids
2370            | CashFraudScheme::FalseRefunds
2371            | CashFraudScheme::MischaracterizedExpenses => 3,
2372            // Medium severity
2373            CashFraudScheme::OverstatedExpenses
2374            | CashFraudScheme::Skimming
2375            | CashFraudScheme::Larceny
2376            | CashFraudScheme::PersonalPurchases
2377            | CashFraudScheme::FalsifiedWages => 4,
2378            // Higher severity - larger amounts, harder to detect
2379            CashFraudScheme::ShellCompany
2380            | CashFraudScheme::GhostEmployee
2381            | CashFraudScheme::FictitiousExpenses
2382            | CashFraudScheme::ForgedMaker
2383            | CashFraudScheme::AuthorizedMaker => 5,
2384            _ => 4,
2385        }
2386    }
2387
2388    /// Returns the typical detection difficulty.
2389    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2390        match self {
2391            // Easy to detect with basic controls
2392            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2393                AnomalyDetectionDifficulty::Easy
2394            }
2395            // Moderate - requires reconciliation
2396            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2397                AnomalyDetectionDifficulty::Moderate
2398            }
2399            // Hard - requires sophisticated analysis
2400            CashFraudScheme::Skimming
2401            | CashFraudScheme::ShellCompany
2402            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2403            // Expert level
2404            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2405                AnomalyDetectionDifficulty::Expert
2406            }
2407            _ => AnomalyDetectionDifficulty::Moderate,
2408        }
2409    }
2410
2411    /// Returns all variants for iteration.
2412    pub fn all_variants() -> &'static [CashFraudScheme] {
2413        &[
2414            CashFraudScheme::Larceny,
2415            CashFraudScheme::Skimming,
2416            CashFraudScheme::SalesSkimming,
2417            CashFraudScheme::ReceivablesSkimming,
2418            CashFraudScheme::RefundSchemes,
2419            CashFraudScheme::ShellCompany,
2420            CashFraudScheme::NonAccompliceVendor,
2421            CashFraudScheme::PersonalPurchases,
2422            CashFraudScheme::GhostEmployee,
2423            CashFraudScheme::FalsifiedWages,
2424            CashFraudScheme::CommissionSchemes,
2425            CashFraudScheme::MischaracterizedExpenses,
2426            CashFraudScheme::OverstatedExpenses,
2427            CashFraudScheme::FictitiousExpenses,
2428            CashFraudScheme::ForgedMaker,
2429            CashFraudScheme::ForgedEndorsement,
2430            CashFraudScheme::AlteredPayee,
2431            CashFraudScheme::AuthorizedMaker,
2432            CashFraudScheme::FalseVoids,
2433            CashFraudScheme::FalseRefunds,
2434        ]
2435    }
2436}
2437
2438/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2439#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2440pub enum AssetFraudScheme {
2441    // ========== Inventory Schemes ==========
2442    /// Misusing or converting inventory for personal benefit.
2443    InventoryMisuse,
2444    /// Stealing physical inventory items.
2445    InventoryTheft,
2446    /// Manipulating purchasing to facilitate theft.
2447    InventoryPurchasingScheme,
2448    /// Manipulating receiving/shipping to steal inventory.
2449    InventoryReceivingScheme,
2450
2451    // ========== Other Asset Schemes ==========
2452    /// Misusing company equipment or vehicles.
2453    EquipmentMisuse,
2454    /// Theft of company equipment, tools, or supplies.
2455    EquipmentTheft,
2456    /// Unauthorized access to or theft of intellectual property.
2457    IntellectualPropertyTheft,
2458    /// Using company time/resources for personal business.
2459    TimeTheft,
2460}
2461
2462impl AssetFraudScheme {
2463    /// Returns the ACFE category this scheme belongs to.
2464    pub fn category(&self) -> AcfeFraudCategory {
2465        AcfeFraudCategory::AssetMisappropriation
2466    }
2467
2468    /// Returns the subcategory within the ACFE Fraud Tree.
2469    pub fn subcategory(&self) -> &'static str {
2470        match self {
2471            AssetFraudScheme::InventoryMisuse
2472            | AssetFraudScheme::InventoryTheft
2473            | AssetFraudScheme::InventoryPurchasingScheme
2474            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2475            _ => "other_assets",
2476        }
2477    }
2478
2479    /// Returns the typical severity (1-5) for this scheme.
2480    pub fn severity(&self) -> u8 {
2481        match self {
2482            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2483            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2484            AssetFraudScheme::InventoryTheft
2485            | AssetFraudScheme::InventoryPurchasingScheme
2486            | AssetFraudScheme::InventoryReceivingScheme => 4,
2487            AssetFraudScheme::IntellectualPropertyTheft => 5,
2488        }
2489    }
2490}
2491
2492/// Corruption schemes under the ACFE Fraud Tree.
2493///
2494/// Corruption schemes involve the wrongful use of influence in a business
2495/// transaction to procure personal benefit.
2496#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2497pub enum CorruptionScheme {
2498    // ========== Conflicts of Interest ==========
2499    /// Employee has undisclosed financial interest in purchasing decisions.
2500    PurchasingConflict,
2501    /// Employee has undisclosed relationship with customer/vendor.
2502    SalesConflict,
2503    /// Employee owns or has interest in competing business.
2504    OutsideBusinessInterest,
2505    /// Employee makes decisions benefiting family members.
2506    NepotismConflict,
2507
2508    // ========== Bribery ==========
2509    /// Kickback payments from vendors for favorable treatment.
2510    InvoiceKickback,
2511    /// Collusion among vendors to inflate prices.
2512    BidRigging,
2513    /// Other cash payments for favorable decisions.
2514    CashBribery,
2515    /// Bribery of government officials.
2516    PublicOfficial,
2517
2518    // ========== Illegal Gratuities ==========
2519    /// Gifts given after favorable decisions (not agreed in advance).
2520    IllegalGratuity,
2521
2522    // ========== Economic Extortion ==========
2523    /// Demanding payment under threat of adverse action.
2524    EconomicExtortion,
2525}
2526
2527impl CorruptionScheme {
2528    /// Returns the ACFE category this scheme belongs to.
2529    pub fn category(&self) -> AcfeFraudCategory {
2530        AcfeFraudCategory::Corruption
2531    }
2532
2533    /// Returns the subcategory within the ACFE Fraud Tree.
2534    pub fn subcategory(&self) -> &'static str {
2535        match self {
2536            CorruptionScheme::PurchasingConflict
2537            | CorruptionScheme::SalesConflict
2538            | CorruptionScheme::OutsideBusinessInterest
2539            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2540            CorruptionScheme::InvoiceKickback
2541            | CorruptionScheme::BidRigging
2542            | CorruptionScheme::CashBribery
2543            | CorruptionScheme::PublicOfficial => "bribery",
2544            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2545            CorruptionScheme::EconomicExtortion => "economic_extortion",
2546        }
2547    }
2548
2549    /// Returns the typical severity (1-5) for this scheme.
2550    pub fn severity(&self) -> u8 {
2551        match self {
2552            // Lower severity conflicts of interest
2553            CorruptionScheme::NepotismConflict => 3,
2554            // Medium severity
2555            CorruptionScheme::PurchasingConflict
2556            | CorruptionScheme::SalesConflict
2557            | CorruptionScheme::OutsideBusinessInterest
2558            | CorruptionScheme::IllegalGratuity => 4,
2559            // High severity - active corruption
2560            CorruptionScheme::InvoiceKickback
2561            | CorruptionScheme::BidRigging
2562            | CorruptionScheme::CashBribery
2563            | CorruptionScheme::EconomicExtortion => 5,
2564            // Highest severity - involves public officials
2565            CorruptionScheme::PublicOfficial => 5,
2566        }
2567    }
2568
2569    /// Returns the typical detection difficulty.
2570    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2571        match self {
2572            // Easier to detect with proper disclosure requirements
2573            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2574                AnomalyDetectionDifficulty::Moderate
2575            }
2576            // Hard - requires transaction pattern analysis
2577            CorruptionScheme::PurchasingConflict
2578            | CorruptionScheme::SalesConflict
2579            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2580            // Expert level - deliberate concealment
2581            CorruptionScheme::InvoiceKickback
2582            | CorruptionScheme::CashBribery
2583            | CorruptionScheme::PublicOfficial
2584            | CorruptionScheme::IllegalGratuity
2585            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2586        }
2587    }
2588
2589    /// Returns all variants for iteration.
2590    pub fn all_variants() -> &'static [CorruptionScheme] {
2591        &[
2592            CorruptionScheme::PurchasingConflict,
2593            CorruptionScheme::SalesConflict,
2594            CorruptionScheme::OutsideBusinessInterest,
2595            CorruptionScheme::NepotismConflict,
2596            CorruptionScheme::InvoiceKickback,
2597            CorruptionScheme::BidRigging,
2598            CorruptionScheme::CashBribery,
2599            CorruptionScheme::PublicOfficial,
2600            CorruptionScheme::IllegalGratuity,
2601            CorruptionScheme::EconomicExtortion,
2602        ]
2603    }
2604}
2605
2606/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2607///
2608/// Financial statement fraud involves the intentional misstatement or omission
2609/// of material information in financial reports.
2610#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2611pub enum FinancialStatementScheme {
2612    // ========== Asset/Revenue Overstatement ==========
2613    /// Recording revenue before it is earned.
2614    PrematureRevenue,
2615    /// Deferring expenses to future periods.
2616    DelayedExpenses,
2617    /// Recording revenue for transactions that never occurred.
2618    FictitiousRevenues,
2619    /// Failing to record known liabilities.
2620    ConcealedLiabilities,
2621    /// Overstating the value of assets.
2622    ImproperAssetValuations,
2623    /// Omitting or misstating required disclosures.
2624    ImproperDisclosures,
2625    /// Manipulating timing of revenue recognition (channel stuffing).
2626    ChannelStuffing,
2627    /// Recognizing bill-and-hold revenue improperly.
2628    BillAndHold,
2629    /// Capitalizing expenses that should be expensed.
2630    ImproperCapitalization,
2631
2632    // ========== Asset/Revenue Understatement ==========
2633    /// Understating revenue (often for tax purposes).
2634    UnderstatedRevenues,
2635    /// Recording excessive expenses.
2636    OverstatedExpenses,
2637    /// Recording excessive liabilities or reserves.
2638    OverstatedLiabilities,
2639    /// Undervaluing assets for writedowns/reserves.
2640    ImproperAssetWritedowns,
2641}
2642
2643impl FinancialStatementScheme {
2644    /// Returns the ACFE category this scheme belongs to.
2645    pub fn category(&self) -> AcfeFraudCategory {
2646        AcfeFraudCategory::FinancialStatementFraud
2647    }
2648
2649    /// Returns the subcategory within the ACFE Fraud Tree.
2650    pub fn subcategory(&self) -> &'static str {
2651        match self {
2652            FinancialStatementScheme::UnderstatedRevenues
2653            | FinancialStatementScheme::OverstatedExpenses
2654            | FinancialStatementScheme::OverstatedLiabilities
2655            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2656            _ => "overstatement",
2657        }
2658    }
2659
2660    /// Returns the typical severity (1-5) for this scheme.
2661    pub fn severity(&self) -> u8 {
2662        // All financial statement fraud is high severity
2663        5
2664    }
2665
2666    /// Returns the typical detection difficulty.
2667    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2668        match self {
2669            // Easier to detect with good analytics
2670            FinancialStatementScheme::ChannelStuffing
2671            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2672            // Hard - requires deep analysis
2673            FinancialStatementScheme::PrematureRevenue
2674            | FinancialStatementScheme::ImproperCapitalization
2675            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2676            // Expert level
2677            FinancialStatementScheme::FictitiousRevenues
2678            | FinancialStatementScheme::ConcealedLiabilities
2679            | FinancialStatementScheme::ImproperAssetValuations
2680            | FinancialStatementScheme::ImproperDisclosures
2681            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2682            _ => AnomalyDetectionDifficulty::Hard,
2683        }
2684    }
2685
2686    /// Returns all variants for iteration.
2687    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2688        &[
2689            FinancialStatementScheme::PrematureRevenue,
2690            FinancialStatementScheme::DelayedExpenses,
2691            FinancialStatementScheme::FictitiousRevenues,
2692            FinancialStatementScheme::ConcealedLiabilities,
2693            FinancialStatementScheme::ImproperAssetValuations,
2694            FinancialStatementScheme::ImproperDisclosures,
2695            FinancialStatementScheme::ChannelStuffing,
2696            FinancialStatementScheme::BillAndHold,
2697            FinancialStatementScheme::ImproperCapitalization,
2698            FinancialStatementScheme::UnderstatedRevenues,
2699            FinancialStatementScheme::OverstatedExpenses,
2700            FinancialStatementScheme::OverstatedLiabilities,
2701            FinancialStatementScheme::ImproperAssetWritedowns,
2702        ]
2703    }
2704}
2705
2706/// Unified ACFE scheme type that encompasses all fraud schemes.
2707#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2708pub enum AcfeScheme {
2709    /// Cash-based fraud schemes.
2710    Cash(CashFraudScheme),
2711    /// Inventory and other asset fraud schemes.
2712    Asset(AssetFraudScheme),
2713    /// Corruption schemes.
2714    Corruption(CorruptionScheme),
2715    /// Financial statement fraud schemes.
2716    FinancialStatement(FinancialStatementScheme),
2717}
2718
2719impl AcfeScheme {
2720    /// Returns the ACFE category this scheme belongs to.
2721    pub fn category(&self) -> AcfeFraudCategory {
2722        match self {
2723            AcfeScheme::Cash(s) => s.category(),
2724            AcfeScheme::Asset(s) => s.category(),
2725            AcfeScheme::Corruption(s) => s.category(),
2726            AcfeScheme::FinancialStatement(s) => s.category(),
2727        }
2728    }
2729
2730    /// Returns the severity (1-5) for this scheme.
2731    pub fn severity(&self) -> u8 {
2732        match self {
2733            AcfeScheme::Cash(s) => s.severity(),
2734            AcfeScheme::Asset(s) => s.severity(),
2735            AcfeScheme::Corruption(s) => s.severity(),
2736            AcfeScheme::FinancialStatement(s) => s.severity(),
2737        }
2738    }
2739
2740    /// Returns the detection difficulty for this scheme.
2741    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2742        match self {
2743            AcfeScheme::Cash(s) => s.detection_difficulty(),
2744            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2745            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2746            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2747        }
2748    }
2749}
2750
2751/// How a fraud was detected (from ACFE statistics).
2752#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2753pub enum AcfeDetectionMethod {
2754    /// Tip from employee, customer, vendor, or anonymous source.
2755    Tip,
2756    /// Internal audit procedures.
2757    InternalAudit,
2758    /// Management review and oversight.
2759    ManagementReview,
2760    /// External audit procedures.
2761    ExternalAudit,
2762    /// Account reconciliation discrepancies.
2763    AccountReconciliation,
2764    /// Document examination.
2765    DocumentExamination,
2766    /// Discovered by accident.
2767    ByAccident,
2768    /// Automated monitoring/IT controls.
2769    ItControls,
2770    /// Surveillance or investigation.
2771    Surveillance,
2772    /// Confession by perpetrator.
2773    Confession,
2774    /// Law enforcement notification.
2775    LawEnforcement,
2776    /// Other detection method.
2777    Other,
2778}
2779
2780impl AcfeDetectionMethod {
2781    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2782    pub fn typical_detection_rate(&self) -> f64 {
2783        match self {
2784            AcfeDetectionMethod::Tip => 0.42,
2785            AcfeDetectionMethod::InternalAudit => 0.16,
2786            AcfeDetectionMethod::ManagementReview => 0.12,
2787            AcfeDetectionMethod::ExternalAudit => 0.04,
2788            AcfeDetectionMethod::AccountReconciliation => 0.05,
2789            AcfeDetectionMethod::DocumentExamination => 0.04,
2790            AcfeDetectionMethod::ByAccident => 0.06,
2791            AcfeDetectionMethod::ItControls => 0.03,
2792            AcfeDetectionMethod::Surveillance => 0.02,
2793            AcfeDetectionMethod::Confession => 0.02,
2794            AcfeDetectionMethod::LawEnforcement => 0.01,
2795            AcfeDetectionMethod::Other => 0.03,
2796        }
2797    }
2798
2799    /// Returns all variants for iteration.
2800    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2801        &[
2802            AcfeDetectionMethod::Tip,
2803            AcfeDetectionMethod::InternalAudit,
2804            AcfeDetectionMethod::ManagementReview,
2805            AcfeDetectionMethod::ExternalAudit,
2806            AcfeDetectionMethod::AccountReconciliation,
2807            AcfeDetectionMethod::DocumentExamination,
2808            AcfeDetectionMethod::ByAccident,
2809            AcfeDetectionMethod::ItControls,
2810            AcfeDetectionMethod::Surveillance,
2811            AcfeDetectionMethod::Confession,
2812            AcfeDetectionMethod::LawEnforcement,
2813            AcfeDetectionMethod::Other,
2814        ]
2815    }
2816}
2817
2818/// Department/position of perpetrator (from ACFE statistics).
2819#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2820pub enum PerpetratorDepartment {
2821    /// Accounting, finance, or bookkeeping.
2822    Accounting,
2823    /// Operations or manufacturing.
2824    Operations,
2825    /// Executive/upper management.
2826    Executive,
2827    /// Sales.
2828    Sales,
2829    /// Customer service.
2830    CustomerService,
2831    /// Purchasing/procurement.
2832    Purchasing,
2833    /// Information technology.
2834    It,
2835    /// Human resources.
2836    HumanResources,
2837    /// Administrative/clerical.
2838    Administrative,
2839    /// Warehouse/inventory.
2840    Warehouse,
2841    /// Board of directors.
2842    BoardOfDirectors,
2843    /// Other department.
2844    Other,
2845}
2846
2847impl PerpetratorDepartment {
2848    /// Returns the typical percentage of frauds by department (from ACFE reports).
2849    pub fn typical_occurrence_rate(&self) -> f64 {
2850        match self {
2851            PerpetratorDepartment::Accounting => 0.21,
2852            PerpetratorDepartment::Operations => 0.17,
2853            PerpetratorDepartment::Executive => 0.12,
2854            PerpetratorDepartment::Sales => 0.11,
2855            PerpetratorDepartment::CustomerService => 0.07,
2856            PerpetratorDepartment::Purchasing => 0.06,
2857            PerpetratorDepartment::It => 0.05,
2858            PerpetratorDepartment::HumanResources => 0.04,
2859            PerpetratorDepartment::Administrative => 0.04,
2860            PerpetratorDepartment::Warehouse => 0.03,
2861            PerpetratorDepartment::BoardOfDirectors => 0.02,
2862            PerpetratorDepartment::Other => 0.08,
2863        }
2864    }
2865
2866    /// Returns the typical median loss by perpetrator department.
2867    pub fn typical_median_loss(&self) -> Decimal {
2868        match self {
2869            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2870            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2871            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2872            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2873            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2874            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2875            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2876            _ => Decimal::new(80_000, 0),
2877        }
2878    }
2879}
2880
2881/// Perpetrator position level (from ACFE statistics).
2882#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2883pub enum PerpetratorLevel {
2884    /// Entry-level employee.
2885    Employee,
2886    /// Manager or supervisor.
2887    Manager,
2888    /// Owner, executive, or C-level.
2889    OwnerExecutive,
2890}
2891
2892impl PerpetratorLevel {
2893    /// Returns the typical percentage of frauds by position level.
2894    pub fn typical_occurrence_rate(&self) -> f64 {
2895        match self {
2896            PerpetratorLevel::Employee => 0.42,
2897            PerpetratorLevel::Manager => 0.36,
2898            PerpetratorLevel::OwnerExecutive => 0.22,
2899        }
2900    }
2901
2902    /// Returns the typical median loss by position level.
2903    pub fn typical_median_loss(&self) -> Decimal {
2904        match self {
2905            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2906            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2907            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2908        }
2909    }
2910}
2911
2912/// ACFE Calibration data for fraud generation.
2913///
2914/// Contains statistical parameters based on ACFE Report to the Nations
2915/// for realistic fraud pattern generation.
2916#[derive(Debug, Clone, Serialize, Deserialize)]
2917pub struct AcfeCalibration {
2918    /// Overall median loss for occupational fraud ($117,000 typical).
2919    pub median_loss: Decimal,
2920    /// Median duration in months before detection (12 months typical).
2921    pub median_duration_months: u32,
2922    /// Distribution of fraud by category.
2923    pub category_distribution: HashMap<String, f64>,
2924    /// Distribution of detection methods.
2925    pub detection_method_distribution: HashMap<String, f64>,
2926    /// Distribution by perpetrator department.
2927    pub department_distribution: HashMap<String, f64>,
2928    /// Distribution by perpetrator level.
2929    pub level_distribution: HashMap<String, f64>,
2930    /// Average number of red flags per fraud case.
2931    pub avg_red_flags_per_case: f64,
2932    /// Percentage of frauds involving collusion.
2933    pub collusion_rate: f64,
2934}
2935
2936impl Default for AcfeCalibration {
2937    fn default() -> Self {
2938        let mut category_distribution = HashMap::new();
2939        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2940        category_distribution.insert("corruption".to_string(), 0.33);
2941        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2942
2943        let mut detection_method_distribution = HashMap::new();
2944        for method in AcfeDetectionMethod::all_variants() {
2945            detection_method_distribution.insert(
2946                format!("{method:?}").to_lowercase(),
2947                method.typical_detection_rate(),
2948            );
2949        }
2950
2951        let mut department_distribution = HashMap::new();
2952        department_distribution.insert("accounting".to_string(), 0.21);
2953        department_distribution.insert("operations".to_string(), 0.17);
2954        department_distribution.insert("executive".to_string(), 0.12);
2955        department_distribution.insert("sales".to_string(), 0.11);
2956        department_distribution.insert("customer_service".to_string(), 0.07);
2957        department_distribution.insert("purchasing".to_string(), 0.06);
2958        department_distribution.insert("other".to_string(), 0.26);
2959
2960        let mut level_distribution = HashMap::new();
2961        level_distribution.insert("employee".to_string(), 0.42);
2962        level_distribution.insert("manager".to_string(), 0.36);
2963        level_distribution.insert("owner_executive".to_string(), 0.22);
2964
2965        Self {
2966            median_loss: Decimal::new(117_000, 0),
2967            median_duration_months: 12,
2968            category_distribution,
2969            detection_method_distribution,
2970            department_distribution,
2971            level_distribution,
2972            avg_red_flags_per_case: 2.8,
2973            collusion_rate: 0.50,
2974        }
2975    }
2976}
2977
2978impl AcfeCalibration {
2979    /// Creates a new ACFE calibration with the given parameters.
2980    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2981        Self {
2982            median_loss,
2983            median_duration_months,
2984            ..Self::default()
2985        }
2986    }
2987
2988    /// Returns the median loss for a specific category.
2989    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2990        category.typical_median_loss()
2991    }
2992
2993    /// Returns the median duration for a specific category.
2994    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
2995        category.typical_detection_months()
2996    }
2997
2998    /// Validates the calibration data.
2999    pub fn validate(&self) -> Result<(), String> {
3000        if self.median_loss <= Decimal::ZERO {
3001            return Err("Median loss must be positive".to_string());
3002        }
3003        if self.median_duration_months == 0 {
3004            return Err("Median duration must be at least 1 month".to_string());
3005        }
3006        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3007            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3008        }
3009        Ok(())
3010    }
3011}
3012
3013/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3014///
3015/// The fraud triangle is a model for explaining the factors that cause
3016/// someone to commit occupational fraud.
3017#[derive(Debug, Clone, Serialize, Deserialize)]
3018pub struct FraudTriangle {
3019    /// Pressure or incentive to commit fraud.
3020    pub pressure: PressureType,
3021    /// Opportunity factors that enable fraud.
3022    pub opportunities: Vec<OpportunityFactor>,
3023    /// Rationalization used to justify the fraud.
3024    pub rationalization: Rationalization,
3025}
3026
3027impl FraudTriangle {
3028    /// Creates a new fraud triangle.
3029    pub fn new(
3030        pressure: PressureType,
3031        opportunities: Vec<OpportunityFactor>,
3032        rationalization: Rationalization,
3033    ) -> Self {
3034        Self {
3035            pressure,
3036            opportunities,
3037            rationalization,
3038        }
3039    }
3040
3041    /// Returns a risk score based on the fraud triangle components.
3042    pub fn risk_score(&self) -> f64 {
3043        let pressure_score = self.pressure.risk_weight();
3044        let opportunity_score: f64 = self
3045            .opportunities
3046            .iter()
3047            .map(OpportunityFactor::risk_weight)
3048            .sum::<f64>()
3049            / self.opportunities.len().max(1) as f64;
3050        let rationalization_score = self.rationalization.risk_weight();
3051
3052        (pressure_score + opportunity_score + rationalization_score) / 3.0
3053    }
3054}
3055
3056/// Types of pressure/incentive that can lead to fraud.
3057#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3058pub enum PressureType {
3059    // Financial Pressures
3060    /// Personal financial difficulties (debt, lifestyle beyond means).
3061    PersonalFinancialDifficulties,
3062    /// Pressure to meet financial targets/earnings expectations.
3063    FinancialTargets,
3064    /// Market or analyst expectations.
3065    MarketExpectations,
3066    /// Debt covenant compliance requirements.
3067    CovenantCompliance,
3068    /// Credit rating maintenance.
3069    CreditRatingMaintenance,
3070    /// Acquisition/merger valuation pressure.
3071    AcquisitionValuation,
3072
3073    // Non-Financial Pressures
3074    /// Fear of job loss.
3075    JobSecurity,
3076    /// Pressure to maintain status or image.
3077    StatusMaintenance,
3078    /// Gambling addiction.
3079    GamblingAddiction,
3080    /// Substance abuse issues.
3081    SubstanceAbuse,
3082    /// Family pressure or obligations.
3083    FamilyPressure,
3084    /// Greed or desire for more.
3085    Greed,
3086}
3087
3088impl PressureType {
3089    /// Returns the risk weight (0.0-1.0) for this pressure type.
3090    pub fn risk_weight(&self) -> f64 {
3091        match self {
3092            PressureType::PersonalFinancialDifficulties => 0.80,
3093            PressureType::FinancialTargets => 0.75,
3094            PressureType::MarketExpectations => 0.70,
3095            PressureType::CovenantCompliance => 0.85,
3096            PressureType::CreditRatingMaintenance => 0.70,
3097            PressureType::AcquisitionValuation => 0.75,
3098            PressureType::JobSecurity => 0.65,
3099            PressureType::StatusMaintenance => 0.55,
3100            PressureType::GamblingAddiction => 0.90,
3101            PressureType::SubstanceAbuse => 0.85,
3102            PressureType::FamilyPressure => 0.60,
3103            PressureType::Greed => 0.70,
3104        }
3105    }
3106}
3107
3108/// Opportunity factors that enable fraud.
3109#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3110pub enum OpportunityFactor {
3111    /// Weak internal controls.
3112    WeakInternalControls,
3113    /// Lack of segregation of duties.
3114    LackOfSegregation,
3115    /// Override capability.
3116    ManagementOverride,
3117    /// Complex or unusual transactions.
3118    ComplexTransactions,
3119    /// Related party transactions.
3120    RelatedPartyTransactions,
3121    /// Poor tone at the top.
3122    PoorToneAtTop,
3123    /// Inadequate supervision.
3124    InadequateSupervision,
3125    /// Access to assets without accountability.
3126    AssetAccess,
3127    /// Inadequate record keeping.
3128    PoorRecordKeeping,
3129    /// Failure to discipline fraud perpetrators.
3130    LackOfDiscipline,
3131    /// Lack of independent checks.
3132    LackOfIndependentChecks,
3133}
3134
3135impl OpportunityFactor {
3136    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3137    pub fn risk_weight(&self) -> f64 {
3138        match self {
3139            OpportunityFactor::WeakInternalControls => 0.85,
3140            OpportunityFactor::LackOfSegregation => 0.80,
3141            OpportunityFactor::ManagementOverride => 0.90,
3142            OpportunityFactor::ComplexTransactions => 0.70,
3143            OpportunityFactor::RelatedPartyTransactions => 0.75,
3144            OpportunityFactor::PoorToneAtTop => 0.85,
3145            OpportunityFactor::InadequateSupervision => 0.75,
3146            OpportunityFactor::AssetAccess => 0.70,
3147            OpportunityFactor::PoorRecordKeeping => 0.65,
3148            OpportunityFactor::LackOfDiscipline => 0.60,
3149            OpportunityFactor::LackOfIndependentChecks => 0.75,
3150        }
3151    }
3152}
3153
3154/// Rationalizations used by fraud perpetrators.
3155#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3156pub enum Rationalization {
3157    /// "I'm just borrowing; I'll pay it back."
3158    TemporaryBorrowing,
3159    /// "Everyone does it."
3160    EveryoneDoesIt,
3161    /// "It's for the good of the company."
3162    ForTheCompanyGood,
3163    /// "I deserve this; the company owes me."
3164    Entitlement,
3165    /// "I was just following orders."
3166    FollowingOrders,
3167    /// "They won't miss it; they have plenty."
3168    TheyWontMissIt,
3169    /// "I need it more than they do."
3170    NeedItMore,
3171    /// "It's not really stealing."
3172    NotReallyStealing,
3173    /// "I'm underpaid for what I do."
3174    Underpaid,
3175    /// "It's a victimless crime."
3176    VictimlessCrime,
3177}
3178
3179impl Rationalization {
3180    /// Returns the risk weight (0.0-1.0) for this rationalization.
3181    pub fn risk_weight(&self) -> f64 {
3182        match self {
3183            // More dangerous rationalizations
3184            Rationalization::Entitlement => 0.85,
3185            Rationalization::EveryoneDoesIt => 0.80,
3186            Rationalization::NotReallyStealing => 0.80,
3187            Rationalization::TheyWontMissIt => 0.75,
3188            // Medium risk
3189            Rationalization::Underpaid => 0.70,
3190            Rationalization::ForTheCompanyGood => 0.65,
3191            Rationalization::NeedItMore => 0.65,
3192            // Lower risk (still indicates fraud)
3193            Rationalization::TemporaryBorrowing => 0.60,
3194            Rationalization::FollowingOrders => 0.55,
3195            Rationalization::VictimlessCrime => 0.60,
3196        }
3197    }
3198}
3199
3200// ============================================================================
3201// NEAR-MISS TYPES
3202// ============================================================================
3203
3204/// Type of near-miss pattern (suspicious but legitimate).
3205#[derive(Debug, Clone, Serialize, Deserialize)]
3206pub enum NearMissPattern {
3207    /// Transaction very similar to another (possible duplicate but legitimate).
3208    NearDuplicate {
3209        /// Date difference from similar transaction.
3210        date_difference_days: u32,
3211        /// Original transaction ID.
3212        similar_transaction_id: String,
3213    },
3214    /// Amount just below approval threshold (but legitimate).
3215    ThresholdProximity {
3216        /// The threshold being approached.
3217        threshold: Decimal,
3218        /// Percentage of threshold (0.0-1.0).
3219        proximity: f64,
3220    },
3221    /// Unusual but legitimate business pattern.
3222    UnusualLegitimate {
3223        /// Type of legitimate pattern.
3224        pattern_type: LegitimatePatternType,
3225        /// Business justification.
3226        justification: String,
3227    },
3228    /// Error that was caught and corrected.
3229    CorrectedError {
3230        /// Days until correction.
3231        correction_lag_days: u32,
3232        /// Correction document ID.
3233        correction_document_id: String,
3234    },
3235}
3236
3237/// Types of unusual but legitimate business patterns.
3238#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3239pub enum LegitimatePatternType {
3240    /// Year-end bonus payment.
3241    YearEndBonus,
3242    /// Contract prepayment.
3243    ContractPrepayment,
3244    /// Settlement payment.
3245    SettlementPayment,
3246    /// Insurance claim.
3247    InsuranceClaim,
3248    /// One-time vendor payment.
3249    OneTimePayment,
3250    /// Asset disposal.
3251    AssetDisposal,
3252    /// Seasonal inventory buildup.
3253    SeasonalInventory,
3254    /// Promotional spending.
3255    PromotionalSpending,
3256}
3257
3258impl LegitimatePatternType {
3259    /// Returns a description of this pattern type.
3260    pub fn description(&self) -> &'static str {
3261        match self {
3262            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3263            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3264            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3265            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3266            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3267            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3268            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3269            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3270        }
3271    }
3272}
3273
3274/// What might trigger a false positive for this near-miss.
3275#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3276pub enum FalsePositiveTrigger {
3277    /// Amount is near threshold.
3278    AmountNearThreshold,
3279    /// Timing is unusual.
3280    UnusualTiming,
3281    /// Similar to existing transaction.
3282    SimilarTransaction,
3283    /// New counterparty.
3284    NewCounterparty,
3285    /// Account combination unusual.
3286    UnusualAccountCombination,
3287    /// Volume spike.
3288    VolumeSpike,
3289    /// Round amount.
3290    RoundAmount,
3291}
3292
3293/// Label for a near-miss case.
3294#[derive(Debug, Clone, Serialize, Deserialize)]
3295pub struct NearMissLabel {
3296    /// Document ID.
3297    pub document_id: String,
3298    /// The near-miss pattern.
3299    pub pattern: NearMissPattern,
3300    /// How suspicious it appears (0.0-1.0).
3301    pub suspicion_score: f64,
3302    /// What would trigger a false positive.
3303    pub false_positive_trigger: FalsePositiveTrigger,
3304    /// Why this is actually legitimate.
3305    pub explanation: String,
3306}
3307
3308impl NearMissLabel {
3309    /// Creates a new near-miss label.
3310    pub fn new(
3311        document_id: impl Into<String>,
3312        pattern: NearMissPattern,
3313        suspicion_score: f64,
3314        trigger: FalsePositiveTrigger,
3315        explanation: impl Into<String>,
3316    ) -> Self {
3317        Self {
3318            document_id: document_id.into(),
3319            pattern,
3320            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3321            false_positive_trigger: trigger,
3322            explanation: explanation.into(),
3323        }
3324    }
3325}
3326
3327/// Configuration for anomaly rates.
3328#[derive(Debug, Clone, Serialize, Deserialize)]
3329pub struct AnomalyRateConfig {
3330    /// Overall anomaly rate (0.0 - 1.0).
3331    pub total_rate: f64,
3332    /// Fraud rate as proportion of anomalies.
3333    pub fraud_rate: f64,
3334    /// Error rate as proportion of anomalies.
3335    pub error_rate: f64,
3336    /// Process issue rate as proportion of anomalies.
3337    pub process_issue_rate: f64,
3338    /// Statistical anomaly rate as proportion of anomalies.
3339    pub statistical_rate: f64,
3340    /// Relational anomaly rate as proportion of anomalies.
3341    pub relational_rate: f64,
3342}
3343
3344impl Default for AnomalyRateConfig {
3345    fn default() -> Self {
3346        Self {
3347            total_rate: 0.02,         // 2% of transactions are anomalous
3348            fraud_rate: 0.25,         // 25% of anomalies are fraud
3349            error_rate: 0.35,         // 35% of anomalies are errors
3350            process_issue_rate: 0.20, // 20% are process issues
3351            statistical_rate: 0.15,   // 15% are statistical
3352            relational_rate: 0.05,    // 5% are relational
3353        }
3354    }
3355}
3356
3357impl AnomalyRateConfig {
3358    /// Validates that rates sum to approximately 1.0.
3359    pub fn validate(&self) -> Result<(), String> {
3360        let sum = self.fraud_rate
3361            + self.error_rate
3362            + self.process_issue_rate
3363            + self.statistical_rate
3364            + self.relational_rate;
3365
3366        if (sum - 1.0).abs() > 0.01 {
3367            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3368        }
3369
3370        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3371            return Err(format!(
3372                "Total rate must be between 0.0 and 1.0, got {}",
3373                self.total_rate
3374            ));
3375        }
3376
3377        Ok(())
3378    }
3379}
3380
3381#[cfg(test)]
3382#[allow(clippy::unwrap_used)]
3383mod tests {
3384    use super::*;
3385    use rust_decimal_macros::dec;
3386
3387    #[test]
3388    fn test_anomaly_type_category() {
3389        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3390        assert_eq!(fraud.category(), "Fraud");
3391        assert!(fraud.is_intentional());
3392
3393        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3394        assert_eq!(error.category(), "Error");
3395        assert!(!error.is_intentional());
3396    }
3397
3398    #[test]
3399    fn test_labeled_anomaly() {
3400        let anomaly = LabeledAnomaly::new(
3401            "ANO001".to_string(),
3402            AnomalyType::Fraud(FraudType::SelfApproval),
3403            "JE001".to_string(),
3404            "JE".to_string(),
3405            "1000".to_string(),
3406            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3407        )
3408        .with_description("User approved their own expense report")
3409        .with_related_entity("USER001");
3410
3411        assert_eq!(anomaly.severity, 3);
3412        assert!(anomaly.is_injected);
3413        assert_eq!(anomaly.related_entities.len(), 1);
3414    }
3415
3416    #[test]
3417    fn test_labeled_anomaly_with_provenance() {
3418        let anomaly = LabeledAnomaly::new(
3419            "ANO001".to_string(),
3420            AnomalyType::Fraud(FraudType::SelfApproval),
3421            "JE001".to_string(),
3422            "JE".to_string(),
3423            "1000".to_string(),
3424            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3425        )
3426        .with_run_id("run-123")
3427        .with_generation_seed(42)
3428        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3429        .with_structured_strategy(InjectionStrategy::SelfApproval {
3430            user_id: "USER001".to_string(),
3431        })
3432        .with_scenario("scenario-001")
3433        .with_original_document_hash("abc123");
3434
3435        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3436        assert_eq!(anomaly.generation_seed, Some(42));
3437        assert!(anomaly.causal_reason.is_some());
3438        assert!(anomaly.structured_strategy.is_some());
3439        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3440        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3441
3442        // Check that legacy injection_strategy is also set
3443        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3444    }
3445
3446    #[test]
3447    fn test_labeled_anomaly_derivation_chain() {
3448        let parent = LabeledAnomaly::new(
3449            "ANO001".to_string(),
3450            AnomalyType::Fraud(FraudType::DuplicatePayment),
3451            "JE001".to_string(),
3452            "JE".to_string(),
3453            "1000".to_string(),
3454            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3455        );
3456
3457        let child = LabeledAnomaly::new(
3458            "ANO002".to_string(),
3459            AnomalyType::Error(ErrorType::DuplicateEntry),
3460            "JE002".to_string(),
3461            "JE".to_string(),
3462            "1000".to_string(),
3463            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3464        )
3465        .with_parent_anomaly(&parent.anomaly_id);
3466
3467        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3468    }
3469
3470    #[test]
3471    fn test_injection_strategy_description() {
3472        let strategy = InjectionStrategy::AmountManipulation {
3473            original: dec!(1000),
3474            factor: 2.5,
3475        };
3476        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3477        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3478
3479        let strategy = InjectionStrategy::ThresholdAvoidance {
3480            threshold: dec!(10000),
3481            adjusted_amount: dec!(9999),
3482        };
3483        assert_eq!(
3484            strategy.description(),
3485            "Amount adjusted to avoid 10000 threshold"
3486        );
3487
3488        let strategy = InjectionStrategy::DateShift {
3489            days_shifted: -5,
3490            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3491        };
3492        assert_eq!(strategy.description(), "Date backdated by 5 days");
3493
3494        let strategy = InjectionStrategy::DateShift {
3495            days_shifted: 3,
3496            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3497        };
3498        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3499    }
3500
3501    #[test]
3502    fn test_causal_reason_variants() {
3503        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3504        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3505            assert!((base_rate - 0.02).abs() < 0.001);
3506        }
3507
3508        let reason = AnomalyCausalReason::TemporalPattern {
3509            pattern_name: "year_end_spike".to_string(),
3510        };
3511        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3512            assert_eq!(pattern_name, "year_end_spike");
3513        }
3514
3515        let reason = AnomalyCausalReason::ScenarioStep {
3516            scenario_type: "kickback".to_string(),
3517            step_number: 3,
3518        };
3519        if let AnomalyCausalReason::ScenarioStep {
3520            scenario_type,
3521            step_number,
3522        } = reason
3523        {
3524            assert_eq!(scenario_type, "kickback");
3525            assert_eq!(step_number, 3);
3526        }
3527    }
3528
3529    #[test]
3530    fn test_feature_vector_length() {
3531        let anomaly = LabeledAnomaly::new(
3532            "ANO001".to_string(),
3533            AnomalyType::Fraud(FraudType::SelfApproval),
3534            "JE001".to_string(),
3535            "JE".to_string(),
3536            "1000".to_string(),
3537            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3538        );
3539
3540        let features = anomaly.to_features();
3541        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3542        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3543    }
3544
3545    #[test]
3546    fn test_feature_vector_with_provenance() {
3547        let anomaly = LabeledAnomaly::new(
3548            "ANO001".to_string(),
3549            AnomalyType::Fraud(FraudType::SelfApproval),
3550            "JE001".to_string(),
3551            "JE".to_string(),
3552            "1000".to_string(),
3553            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3554        )
3555        .with_scenario("scenario-001")
3556        .with_parent_anomaly("ANO000");
3557
3558        let features = anomaly.to_features();
3559
3560        // Last two features should be 1.0 (has scenario, has parent)
3561        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3562        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3563    }
3564
3565    #[test]
3566    fn test_anomaly_summary() {
3567        let anomalies = vec![
3568            LabeledAnomaly::new(
3569                "ANO001".to_string(),
3570                AnomalyType::Fraud(FraudType::SelfApproval),
3571                "JE001".to_string(),
3572                "JE".to_string(),
3573                "1000".to_string(),
3574                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3575            ),
3576            LabeledAnomaly::new(
3577                "ANO002".to_string(),
3578                AnomalyType::Error(ErrorType::DuplicateEntry),
3579                "JE002".to_string(),
3580                "JE".to_string(),
3581                "1000".to_string(),
3582                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3583            ),
3584        ];
3585
3586        let summary = AnomalySummary::from_anomalies(&anomalies);
3587
3588        assert_eq!(summary.total_count, 2);
3589        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3590        assert_eq!(summary.by_category.get("Error"), Some(&1));
3591    }
3592
3593    #[test]
3594    fn test_rate_config_validation() {
3595        let config = AnomalyRateConfig::default();
3596        assert!(config.validate().is_ok());
3597
3598        let bad_config = AnomalyRateConfig {
3599            fraud_rate: 0.5,
3600            error_rate: 0.5,
3601            process_issue_rate: 0.5, // Sum > 1.0
3602            ..Default::default()
3603        };
3604        assert!(bad_config.validate().is_err());
3605    }
3606
3607    #[test]
3608    fn test_injection_strategy_serialization() {
3609        let strategy = InjectionStrategy::SoDViolation {
3610            duty1: "CreatePO".to_string(),
3611            duty2: "ApprovePO".to_string(),
3612            violating_user: "USER001".to_string(),
3613        };
3614
3615        let json = serde_json::to_string(&strategy).unwrap();
3616        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3617
3618        assert_eq!(strategy, deserialized);
3619    }
3620
3621    #[test]
3622    fn test_labeled_anomaly_serialization_with_provenance() {
3623        let anomaly = LabeledAnomaly::new(
3624            "ANO001".to_string(),
3625            AnomalyType::Fraud(FraudType::SelfApproval),
3626            "JE001".to_string(),
3627            "JE".to_string(),
3628            "1000".to_string(),
3629            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3630        )
3631        .with_run_id("run-123")
3632        .with_generation_seed(42)
3633        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3634
3635        let json = serde_json::to_string(&anomaly).unwrap();
3636        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3637
3638        assert_eq!(anomaly.run_id, deserialized.run_id);
3639        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3640    }
3641
3642    // ========================================
3643    // FR-003 ENHANCED TAXONOMY TESTS
3644    // ========================================
3645
3646    #[test]
3647    fn test_anomaly_category_from_anomaly_type() {
3648        // Fraud mappings
3649        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3650        assert_eq!(
3651            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3652            AnomalyCategory::FictitiousVendor
3653        );
3654
3655        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3656        assert_eq!(
3657            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3658            AnomalyCategory::VendorKickback
3659        );
3660
3661        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3662        assert_eq!(
3663            AnomalyCategory::from_anomaly_type(&fraud_structured),
3664            AnomalyCategory::StructuredTransaction
3665        );
3666
3667        // Error mappings
3668        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3669        assert_eq!(
3670            AnomalyCategory::from_anomaly_type(&error_duplicate),
3671            AnomalyCategory::DuplicatePayment
3672        );
3673
3674        // Process issue mappings
3675        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3676        assert_eq!(
3677            AnomalyCategory::from_anomaly_type(&process_skip),
3678            AnomalyCategory::MissingApproval
3679        );
3680
3681        // Relational mappings
3682        let relational_circular =
3683            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3684        assert_eq!(
3685            AnomalyCategory::from_anomaly_type(&relational_circular),
3686            AnomalyCategory::CircularFlow
3687        );
3688    }
3689
3690    #[test]
3691    fn test_anomaly_category_ordinal() {
3692        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3693        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3694        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3695    }
3696
3697    #[test]
3698    fn test_contributing_factor() {
3699        let factor = ContributingFactor::new(
3700            FactorType::AmountDeviation,
3701            15000.0,
3702            10000.0,
3703            true,
3704            0.5,
3705            "Amount exceeds threshold",
3706        );
3707
3708        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3709        assert_eq!(factor.value, 15000.0);
3710        assert_eq!(factor.threshold, 10000.0);
3711        assert!(factor.direction_greater);
3712
3713        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3714        let contribution = factor.contribution();
3715        assert!((contribution - 0.25).abs() < 0.01);
3716    }
3717
3718    #[test]
3719    fn test_contributing_factor_with_evidence() {
3720        let mut data = HashMap::new();
3721        data.insert("expected".to_string(), "10000".to_string());
3722        data.insert("actual".to_string(), "15000".to_string());
3723
3724        let factor = ContributingFactor::new(
3725            FactorType::AmountDeviation,
3726            15000.0,
3727            10000.0,
3728            true,
3729            0.5,
3730            "Amount deviation detected",
3731        )
3732        .with_evidence("transaction_history", data);
3733
3734        assert!(factor.evidence.is_some());
3735        let evidence = factor.evidence.unwrap();
3736        assert_eq!(evidence.source, "transaction_history");
3737        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3738    }
3739
3740    #[test]
3741    fn test_enhanced_anomaly_label() {
3742        let base = LabeledAnomaly::new(
3743            "ANO001".to_string(),
3744            AnomalyType::Fraud(FraudType::DuplicatePayment),
3745            "JE001".to_string(),
3746            "JE".to_string(),
3747            "1000".to_string(),
3748            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3749        );
3750
3751        let enhanced = EnhancedAnomalyLabel::from_base(base)
3752            .with_confidence(0.85)
3753            .with_severity(0.7)
3754            .with_factor(ContributingFactor::new(
3755                FactorType::DuplicateIndicator,
3756                1.0,
3757                0.5,
3758                true,
3759                0.4,
3760                "Duplicate payment detected",
3761            ))
3762            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3763
3764        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3765        assert_eq!(enhanced.enhanced_confidence, 0.85);
3766        assert_eq!(enhanced.enhanced_severity, 0.7);
3767        assert_eq!(enhanced.contributing_factors.len(), 1);
3768        assert_eq!(enhanced.secondary_categories.len(), 1);
3769    }
3770
3771    #[test]
3772    fn test_enhanced_anomaly_label_features() {
3773        let base = LabeledAnomaly::new(
3774            "ANO001".to_string(),
3775            AnomalyType::Fraud(FraudType::SelfApproval),
3776            "JE001".to_string(),
3777            "JE".to_string(),
3778            "1000".to_string(),
3779            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3780        );
3781
3782        let enhanced = EnhancedAnomalyLabel::from_base(base)
3783            .with_confidence(0.9)
3784            .with_severity(0.8)
3785            .with_factor(ContributingFactor::new(
3786                FactorType::ControlBypass,
3787                1.0,
3788                0.0,
3789                true,
3790                0.5,
3791                "Control bypass detected",
3792            ));
3793
3794        let features = enhanced.to_features();
3795
3796        // Should have 25 features (15 base + 10 enhanced)
3797        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3798        assert_eq!(features.len(), 25);
3799
3800        // Check enhanced confidence is in features
3801        assert_eq!(features[15], 0.9); // enhanced_confidence
3802
3803        // Check has_control_bypass flag
3804        assert_eq!(features[21], 1.0); // has_control_bypass
3805    }
3806
3807    #[test]
3808    fn test_enhanced_anomaly_label_feature_names() {
3809        let names = EnhancedAnomalyLabel::feature_names();
3810        assert_eq!(names.len(), 25);
3811        assert!(names.contains(&"enhanced_confidence"));
3812        assert!(names.contains(&"enhanced_severity"));
3813        assert!(names.contains(&"has_control_bypass"));
3814    }
3815
3816    #[test]
3817    fn test_factor_type_names() {
3818        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3819        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3820        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3821    }
3822
3823    #[test]
3824    fn test_anomaly_category_serialization() {
3825        let category = AnomalyCategory::CircularFlow;
3826        let json = serde_json::to_string(&category).unwrap();
3827        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3828        assert_eq!(category, deserialized);
3829
3830        let custom = AnomalyCategory::Custom("custom_type".to_string());
3831        let json = serde_json::to_string(&custom).unwrap();
3832        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3833        assert_eq!(custom, deserialized);
3834    }
3835
3836    #[test]
3837    fn test_enhanced_label_secondary_category_dedup() {
3838        let base = LabeledAnomaly::new(
3839            "ANO001".to_string(),
3840            AnomalyType::Fraud(FraudType::DuplicatePayment),
3841            "JE001".to_string(),
3842            "JE".to_string(),
3843            "1000".to_string(),
3844            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3845        );
3846
3847        let enhanced = EnhancedAnomalyLabel::from_base(base)
3848            // Try to add the primary category as secondary (should be ignored)
3849            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3850            // Add a valid secondary
3851            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3852            // Try to add duplicate secondary (should be ignored)
3853            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3854
3855        // Should only have 1 secondary category (TimingAnomaly)
3856        assert_eq!(enhanced.secondary_categories.len(), 1);
3857        assert_eq!(
3858            enhanced.secondary_categories[0],
3859            AnomalyCategory::TimingAnomaly
3860        );
3861    }
3862
3863    // ==========================================================================
3864    // Accounting Standards Fraud Type Tests
3865    // ==========================================================================
3866
3867    #[test]
3868    fn test_revenue_recognition_fraud_types() {
3869        // Test ASC 606/IFRS 15 related fraud types
3870        let fraud_types = [
3871            FraudType::ImproperRevenueRecognition,
3872            FraudType::ImproperPoAllocation,
3873            FraudType::VariableConsiderationManipulation,
3874            FraudType::ContractModificationMisstatement,
3875        ];
3876
3877        for fraud_type in fraud_types {
3878            let anomaly_type = AnomalyType::Fraud(fraud_type);
3879            assert_eq!(anomaly_type.category(), "Fraud");
3880            assert!(anomaly_type.is_intentional());
3881            assert!(anomaly_type.severity() >= 3);
3882        }
3883    }
3884
3885    #[test]
3886    fn test_lease_accounting_fraud_types() {
3887        // Test ASC 842/IFRS 16 related fraud types
3888        let fraud_types = [
3889            FraudType::LeaseClassificationManipulation,
3890            FraudType::OffBalanceSheetLease,
3891            FraudType::LeaseLiabilityUnderstatement,
3892            FraudType::RouAssetMisstatement,
3893        ];
3894
3895        for fraud_type in fraud_types {
3896            let anomaly_type = AnomalyType::Fraud(fraud_type);
3897            assert_eq!(anomaly_type.category(), "Fraud");
3898            assert!(anomaly_type.is_intentional());
3899            assert!(anomaly_type.severity() >= 3);
3900        }
3901
3902        // Off-balance sheet lease fraud should be high severity
3903        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3904    }
3905
3906    #[test]
3907    fn test_fair_value_fraud_types() {
3908        // Test ASC 820/IFRS 13 related fraud types
3909        let fraud_types = [
3910            FraudType::FairValueHierarchyManipulation,
3911            FraudType::Level3InputManipulation,
3912            FraudType::ValuationTechniqueManipulation,
3913        ];
3914
3915        for fraud_type in fraud_types {
3916            let anomaly_type = AnomalyType::Fraud(fraud_type);
3917            assert_eq!(anomaly_type.category(), "Fraud");
3918            assert!(anomaly_type.is_intentional());
3919            assert!(anomaly_type.severity() >= 4);
3920        }
3921
3922        // Level 3 manipulation is highest severity (unobservable inputs)
3923        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3924    }
3925
3926    #[test]
3927    fn test_impairment_fraud_types() {
3928        // Test ASC 360/IAS 36 related fraud types
3929        let fraud_types = [
3930            FraudType::DelayedImpairment,
3931            FraudType::ImpairmentTestAvoidance,
3932            FraudType::CashFlowProjectionManipulation,
3933            FraudType::ImproperImpairmentReversal,
3934        ];
3935
3936        for fraud_type in fraud_types {
3937            let anomaly_type = AnomalyType::Fraud(fraud_type);
3938            assert_eq!(anomaly_type.category(), "Fraud");
3939            assert!(anomaly_type.is_intentional());
3940            assert!(anomaly_type.severity() >= 3);
3941        }
3942
3943        // Cash flow manipulation has highest severity
3944        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3945    }
3946
3947    // ==========================================================================
3948    // Accounting Standards Error Type Tests
3949    // ==========================================================================
3950
3951    #[test]
3952    fn test_standards_error_types() {
3953        // Test non-fraudulent accounting standards errors
3954        let error_types = [
3955            ErrorType::RevenueTimingError,
3956            ErrorType::PoAllocationError,
3957            ErrorType::LeaseClassificationError,
3958            ErrorType::LeaseCalculationError,
3959            ErrorType::FairValueError,
3960            ErrorType::ImpairmentCalculationError,
3961            ErrorType::DiscountRateError,
3962            ErrorType::FrameworkApplicationError,
3963        ];
3964
3965        for error_type in error_types {
3966            let anomaly_type = AnomalyType::Error(error_type);
3967            assert_eq!(anomaly_type.category(), "Error");
3968            assert!(!anomaly_type.is_intentional());
3969            assert!(anomaly_type.severity() >= 3);
3970        }
3971    }
3972
3973    #[test]
3974    fn test_framework_application_error() {
3975        // Test IFRS vs GAAP confusion errors
3976        let error_type = ErrorType::FrameworkApplicationError;
3977        assert_eq!(error_type.severity(), 4);
3978
3979        let anomaly = LabeledAnomaly::new(
3980            "ERR001".to_string(),
3981            AnomalyType::Error(error_type),
3982            "JE100".to_string(),
3983            "JE".to_string(),
3984            "1000".to_string(),
3985            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3986        )
3987        .with_description("LIFO inventory method used under IFRS (not permitted)")
3988        .with_metadata("framework", "IFRS")
3989        .with_metadata("standard_violated", "IAS 2");
3990
3991        assert_eq!(anomaly.anomaly_type.category(), "Error");
3992        assert_eq!(
3993            anomaly.metadata.get("standard_violated"),
3994            Some(&"IAS 2".to_string())
3995        );
3996    }
3997
3998    #[test]
3999    fn test_standards_anomaly_serialization() {
4000        // Test that new fraud types serialize/deserialize correctly
4001        let fraud_types = [
4002            FraudType::ImproperRevenueRecognition,
4003            FraudType::LeaseClassificationManipulation,
4004            FraudType::FairValueHierarchyManipulation,
4005            FraudType::DelayedImpairment,
4006        ];
4007
4008        for fraud_type in fraud_types {
4009            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4010            let deserialized: FraudType =
4011                serde_json::from_str(&json).expect("Failed to deserialize");
4012            assert_eq!(fraud_type, deserialized);
4013        }
4014
4015        // Test error types
4016        let error_types = [
4017            ErrorType::RevenueTimingError,
4018            ErrorType::LeaseCalculationError,
4019            ErrorType::FairValueError,
4020            ErrorType::FrameworkApplicationError,
4021        ];
4022
4023        for error_type in error_types {
4024            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4025            let deserialized: ErrorType =
4026                serde_json::from_str(&json).expect("Failed to deserialize");
4027            assert_eq!(error_type, deserialized);
4028        }
4029    }
4030
4031    #[test]
4032    fn test_standards_labeled_anomaly() {
4033        // Test creating a labeled anomaly for a standards violation
4034        let anomaly = LabeledAnomaly::new(
4035            "STD001".to_string(),
4036            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4037            "CONTRACT-2024-001".to_string(),
4038            "Revenue".to_string(),
4039            "1000".to_string(),
4040            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4041        )
4042        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4043        .with_monetary_impact(dec!(500000))
4044        .with_metadata("standard", "ASC 606")
4045        .with_metadata("paragraph", "606-10-25-1")
4046        .with_metadata("contract_id", "C-2024-001")
4047        .with_related_entity("CONTRACT-2024-001")
4048        .with_related_entity("CUSTOMER-500");
4049
4050        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4051        assert!(anomaly.is_injected);
4052        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4053        assert_eq!(anomaly.related_entities.len(), 2);
4054        assert_eq!(
4055            anomaly.metadata.get("standard"),
4056            Some(&"ASC 606".to_string())
4057        );
4058    }
4059
4060    // ==========================================================================
4061    // Multi-Dimensional Labeling Tests
4062    // ==========================================================================
4063
4064    #[test]
4065    fn test_severity_level() {
4066        assert_eq!(SeverityLevel::Low.numeric(), 1);
4067        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4068
4069        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4070        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4071
4072        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4073        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4074
4075        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4076    }
4077
4078    #[test]
4079    fn test_anomaly_severity() {
4080        let severity =
4081            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4082
4083        assert_eq!(severity.level, SeverityLevel::High);
4084        assert!(severity.is_material);
4085        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4086
4087        // Not material
4088        let low_severity =
4089            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4090        assert!(!low_severity.is_material);
4091    }
4092
4093    #[test]
4094    fn test_detection_difficulty() {
4095        assert!(
4096            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4097        );
4098        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4099
4100        assert_eq!(
4101            AnomalyDetectionDifficulty::from_score(0.05),
4102            AnomalyDetectionDifficulty::Trivial
4103        );
4104        assert_eq!(
4105            AnomalyDetectionDifficulty::from_score(0.90),
4106            AnomalyDetectionDifficulty::Expert
4107        );
4108
4109        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4110    }
4111
4112    #[test]
4113    fn test_ground_truth_certainty() {
4114        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4115        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4116        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4117    }
4118
4119    #[test]
4120    fn test_detection_method() {
4121        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4122        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4123    }
4124
4125    #[test]
4126    fn test_extended_anomaly_label() {
4127        let base = LabeledAnomaly::new(
4128            "ANO001".to_string(),
4129            AnomalyType::Fraud(FraudType::FictitiousVendor),
4130            "JE001".to_string(),
4131            "JE".to_string(),
4132            "1000".to_string(),
4133            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4134        )
4135        .with_monetary_impact(dec!(100000));
4136
4137        let extended = ExtendedAnomalyLabel::from_base(base)
4138            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4139            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4140            .with_method(DetectionMethod::GraphBased)
4141            .with_method(DetectionMethod::ForensicAudit)
4142            .with_indicator("New vendor with no history")
4143            .with_indicator("Large first transaction")
4144            .with_certainty(GroundTruthCertainty::Definite)
4145            .with_entity("V001")
4146            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4147            .with_scheme("SCHEME001", 2);
4148
4149        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4150        assert_eq!(
4151            extended.detection_difficulty,
4152            AnomalyDetectionDifficulty::Hard
4153        );
4154        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4155        assert_eq!(extended.recommended_methods.len(), 3);
4156        assert_eq!(extended.key_indicators.len(), 2);
4157        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4158        assert_eq!(extended.scheme_stage, Some(2));
4159    }
4160
4161    #[test]
4162    fn test_extended_anomaly_label_features() {
4163        let base = LabeledAnomaly::new(
4164            "ANO001".to_string(),
4165            AnomalyType::Fraud(FraudType::SelfApproval),
4166            "JE001".to_string(),
4167            "JE".to_string(),
4168            "1000".to_string(),
4169            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4170        );
4171
4172        let extended =
4173            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4174
4175        let features = extended.to_features();
4176        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4177        assert_eq!(features.len(), 30);
4178
4179        // Check difficulty score is in features
4180        let difficulty_idx = 18; // Position of difficulty_score
4181        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4182    }
4183
4184    #[test]
4185    fn test_extended_label_near_miss() {
4186        let base = LabeledAnomaly::new(
4187            "ANO001".to_string(),
4188            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4189            "JE001".to_string(),
4190            "JE".to_string(),
4191            "1000".to_string(),
4192            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4193        );
4194
4195        let extended = ExtendedAnomalyLabel::from_base(base)
4196            .as_near_miss("Year-end bonus payment, legitimately high");
4197
4198        assert!(extended.is_near_miss);
4199        assert!(extended.near_miss_explanation.is_some());
4200    }
4201
4202    #[test]
4203    fn test_scheme_type() {
4204        assert_eq!(
4205            SchemeType::GradualEmbezzlement.name(),
4206            "gradual_embezzlement"
4207        );
4208        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4209        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4210    }
4211
4212    #[test]
4213    fn test_concealment_technique() {
4214        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4215        assert!(
4216            ConcealmentTechnique::Collusion.difficulty_bonus()
4217                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4218        );
4219    }
4220
4221    #[test]
4222    fn test_near_miss_label() {
4223        let near_miss = NearMissLabel::new(
4224            "JE001",
4225            NearMissPattern::ThresholdProximity {
4226                threshold: dec!(10000),
4227                proximity: 0.95,
4228            },
4229            0.7,
4230            FalsePositiveTrigger::AmountNearThreshold,
4231            "Transaction is 95% of threshold but business justified",
4232        );
4233
4234        assert_eq!(near_miss.document_id, "JE001");
4235        assert_eq!(near_miss.suspicion_score, 0.7);
4236        assert_eq!(
4237            near_miss.false_positive_trigger,
4238            FalsePositiveTrigger::AmountNearThreshold
4239        );
4240    }
4241
4242    #[test]
4243    fn test_legitimate_pattern_type() {
4244        assert_eq!(
4245            LegitimatePatternType::YearEndBonus.description(),
4246            "Year-end bonus payment"
4247        );
4248        assert_eq!(
4249            LegitimatePatternType::InsuranceClaim.description(),
4250            "Insurance claim reimbursement"
4251        );
4252    }
4253
4254    #[test]
4255    fn test_severity_detection_difficulty_serialization() {
4256        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4257        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4258        let deserialized: AnomalySeverity =
4259            serde_json::from_str(&json).expect("Failed to deserialize");
4260        assert_eq!(severity.level, deserialized.level);
4261
4262        let difficulty = AnomalyDetectionDifficulty::Hard;
4263        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4264        let deserialized: AnomalyDetectionDifficulty =
4265            serde_json::from_str(&json).expect("Failed to deserialize");
4266        assert_eq!(difficulty, deserialized);
4267    }
4268
4269    // ========================================
4270    // ACFE Taxonomy Tests
4271    // ========================================
4272
4273    #[test]
4274    fn test_acfe_fraud_category() {
4275        let asset = AcfeFraudCategory::AssetMisappropriation;
4276        assert_eq!(asset.name(), "asset_misappropriation");
4277        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4278        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4279        assert_eq!(asset.typical_detection_months(), 12);
4280
4281        let corruption = AcfeFraudCategory::Corruption;
4282        assert_eq!(corruption.name(), "corruption");
4283        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4284
4285        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4286        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4287        assert_eq!(fs_fraud.typical_detection_months(), 24);
4288    }
4289
4290    #[test]
4291    fn test_cash_fraud_scheme() {
4292        let shell = CashFraudScheme::ShellCompany;
4293        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4294        assert_eq!(shell.subcategory(), "billing_schemes");
4295        assert_eq!(shell.severity(), 5);
4296        assert_eq!(
4297            shell.detection_difficulty(),
4298            AnomalyDetectionDifficulty::Hard
4299        );
4300
4301        let ghost = CashFraudScheme::GhostEmployee;
4302        assert_eq!(ghost.subcategory(), "payroll_schemes");
4303        assert_eq!(ghost.severity(), 5);
4304
4305        // Test all variants exist
4306        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4307    }
4308
4309    #[test]
4310    fn test_asset_fraud_scheme() {
4311        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4312        assert_eq!(
4313            ip_theft.category(),
4314            AcfeFraudCategory::AssetMisappropriation
4315        );
4316        assert_eq!(ip_theft.subcategory(), "other_assets");
4317        assert_eq!(ip_theft.severity(), 5);
4318
4319        let inv_theft = AssetFraudScheme::InventoryTheft;
4320        assert_eq!(inv_theft.subcategory(), "inventory");
4321        assert_eq!(inv_theft.severity(), 4);
4322    }
4323
4324    #[test]
4325    fn test_corruption_scheme() {
4326        let kickback = CorruptionScheme::InvoiceKickback;
4327        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4328        assert_eq!(kickback.subcategory(), "bribery");
4329        assert_eq!(kickback.severity(), 5);
4330        assert_eq!(
4331            kickback.detection_difficulty(),
4332            AnomalyDetectionDifficulty::Expert
4333        );
4334
4335        let bid_rigging = CorruptionScheme::BidRigging;
4336        assert_eq!(bid_rigging.subcategory(), "bribery");
4337        assert_eq!(
4338            bid_rigging.detection_difficulty(),
4339            AnomalyDetectionDifficulty::Hard
4340        );
4341
4342        let purchasing = CorruptionScheme::PurchasingConflict;
4343        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4344
4345        // Test all variants exist
4346        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4347    }
4348
4349    #[test]
4350    fn test_financial_statement_scheme() {
4351        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4352        assert_eq!(
4353            fictitious.category(),
4354            AcfeFraudCategory::FinancialStatementFraud
4355        );
4356        assert_eq!(fictitious.subcategory(), "overstatement");
4357        assert_eq!(fictitious.severity(), 5);
4358        assert_eq!(
4359            fictitious.detection_difficulty(),
4360            AnomalyDetectionDifficulty::Expert
4361        );
4362
4363        let understated = FinancialStatementScheme::UnderstatedRevenues;
4364        assert_eq!(understated.subcategory(), "understatement");
4365
4366        // Test all variants exist
4367        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4368    }
4369
4370    #[test]
4371    fn test_acfe_scheme_unified() {
4372        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4373        assert_eq!(
4374            cash_scheme.category(),
4375            AcfeFraudCategory::AssetMisappropriation
4376        );
4377        assert_eq!(cash_scheme.severity(), 5);
4378
4379        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4380        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4381
4382        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4383        assert_eq!(
4384            fs_scheme.category(),
4385            AcfeFraudCategory::FinancialStatementFraud
4386        );
4387    }
4388
4389    #[test]
4390    fn test_acfe_detection_method() {
4391        let tip = AcfeDetectionMethod::Tip;
4392        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4393
4394        let internal_audit = AcfeDetectionMethod::InternalAudit;
4395        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4396
4397        let external_audit = AcfeDetectionMethod::ExternalAudit;
4398        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4399
4400        // Test all variants exist
4401        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4402    }
4403
4404    #[test]
4405    fn test_perpetrator_department() {
4406        let accounting = PerpetratorDepartment::Accounting;
4407        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4408        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4409
4410        let executive = PerpetratorDepartment::Executive;
4411        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4412    }
4413
4414    #[test]
4415    fn test_perpetrator_level() {
4416        let employee = PerpetratorLevel::Employee;
4417        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4418        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4419
4420        let exec = PerpetratorLevel::OwnerExecutive;
4421        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4422    }
4423
4424    #[test]
4425    fn test_acfe_calibration() {
4426        let cal = AcfeCalibration::default();
4427        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4428        assert_eq!(cal.median_duration_months, 12);
4429        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4430        assert!(cal.validate().is_ok());
4431
4432        // Test custom calibration
4433        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4434        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4435        assert_eq!(custom_cal.median_duration_months, 18);
4436
4437        // Test validation failure
4438        let bad_cal = AcfeCalibration {
4439            collusion_rate: 1.5,
4440            ..Default::default()
4441        };
4442        assert!(bad_cal.validate().is_err());
4443    }
4444
4445    #[test]
4446    fn test_fraud_triangle() {
4447        let triangle = FraudTriangle::new(
4448            PressureType::FinancialTargets,
4449            vec![
4450                OpportunityFactor::WeakInternalControls,
4451                OpportunityFactor::ManagementOverride,
4452            ],
4453            Rationalization::ForTheCompanyGood,
4454        );
4455
4456        // Risk score should be between 0 and 1
4457        let risk = triangle.risk_score();
4458        assert!((0.0..=1.0).contains(&risk));
4459        // Should be relatively high given the components
4460        assert!(risk > 0.5);
4461    }
4462
4463    #[test]
4464    fn test_pressure_types() {
4465        let financial = PressureType::FinancialTargets;
4466        assert!(financial.risk_weight() > 0.5);
4467
4468        let gambling = PressureType::GamblingAddiction;
4469        assert_eq!(gambling.risk_weight(), 0.90);
4470    }
4471
4472    #[test]
4473    fn test_opportunity_factors() {
4474        let override_factor = OpportunityFactor::ManagementOverride;
4475        assert_eq!(override_factor.risk_weight(), 0.90);
4476
4477        let weak_controls = OpportunityFactor::WeakInternalControls;
4478        assert!(weak_controls.risk_weight() > 0.8);
4479    }
4480
4481    #[test]
4482    fn test_rationalizations() {
4483        let entitlement = Rationalization::Entitlement;
4484        assert!(entitlement.risk_weight() > 0.8);
4485
4486        let borrowing = Rationalization::TemporaryBorrowing;
4487        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4488    }
4489
4490    #[test]
4491    fn test_acfe_scheme_serialization() {
4492        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4493        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4494        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4495        assert_eq!(scheme, deserialized);
4496
4497        let calibration = AcfeCalibration::default();
4498        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4499        let deserialized: AcfeCalibration =
4500            serde_json::from_str(&json).expect("Failed to deserialize");
4501        assert_eq!(calibration.median_loss, deserialized.median_loss);
4502    }
4503}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs