datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Invoice manipulation.
352    InvoiceManipulation,
353
354    // Asset Fraud
355    /// Misappropriation of assets.
356    AssetMisappropriation,
357    /// Inventory theft.
358    InventoryTheft,
359    /// Ghost employee.
360    GhostEmployee,
361
362    // Financial Statement Fraud
363    /// Premature revenue recognition.
364    PrematureRevenue,
365    /// Understated liabilities.
366    UnderstatedLiabilities,
367    /// Overstated assets.
368    OverstatedAssets,
369    /// Channel stuffing.
370    ChannelStuffing,
371
372    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
373    /// Improper revenue recognition timing (ASC 606/IFRS 15).
374    ImproperRevenueRecognition,
375    /// Multiple performance obligations not properly separated.
376    ImproperPoAllocation,
377    /// Variable consideration not properly estimated.
378    VariableConsiderationManipulation,
379    /// Contract modifications not properly accounted for.
380    ContractModificationMisstatement,
381
382    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
383    /// Lease classification manipulation (operating vs finance).
384    LeaseClassificationManipulation,
385    /// Off-balance sheet lease fraud.
386    OffBalanceSheetLease,
387    /// Lease liability understatement.
388    LeaseLiabilityUnderstatement,
389    /// ROU asset misstatement.
390    RouAssetMisstatement,
391
392    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
393    /// Fair value hierarchy misclassification.
394    FairValueHierarchyManipulation,
395    /// Level 3 input manipulation.
396    Level3InputManipulation,
397    /// Valuation technique manipulation.
398    ValuationTechniqueManipulation,
399
400    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
401    /// Delayed impairment recognition.
402    DelayedImpairment,
403    /// Improperly avoiding impairment testing.
404    ImpairmentTestAvoidance,
405    /// Cash flow projection manipulation for impairment.
406    CashFlowProjectionManipulation,
407    /// Improper impairment reversal (IFRS only).
408    ImproperImpairmentReversal,
409
410    // Sourcing/Procurement Fraud (S2C)
411    /// Bid rigging or collusion among bidders.
412    BidRigging,
413    /// Contracts with phantom/shell vendors.
414    PhantomVendorContract,
415    /// Splitting contracts to avoid approval thresholds.
416    SplitContractThreshold,
417    /// Conflict of interest in sourcing decisions.
418    ConflictOfInterestSourcing,
419
420    // HR/Payroll Fraud (H2R)
421    /// Ghost employee on payroll.
422    GhostEmployeePayroll,
423    /// Payroll inflation/unauthorized raises.
424    PayrollInflation,
425    /// Duplicate expense report submission.
426    DuplicateExpenseReport,
427    /// Fictitious expense claims.
428    FictitiousExpense,
429    /// Splitting expenses to avoid approval threshold.
430    SplitExpenseToAvoidApproval,
431
432    // O2C Fraud
433    /// Revenue timing manipulation via quotes.
434    RevenueTimingManipulation,
435    /// Overriding quote prices without authorization.
436    QuotePriceOverride,
437}
438
439impl FraudType {
440    /// Returns severity level (1-5).
441    pub fn severity(&self) -> u8 {
442        match self {
443            FraudType::RoundDollarManipulation => 2,
444            FraudType::JustBelowThreshold => 3,
445            FraudType::SelfApproval => 3,
446            FraudType::ExceededApprovalLimit => 3,
447            FraudType::DuplicatePayment => 3,
448            FraudType::FictitiousEntry => 4,
449            FraudType::RevenueManipulation => 5,
450            FraudType::FictitiousVendor => 5,
451            FraudType::ShellCompanyPayment => 5,
452            FraudType::AssetMisappropriation => 5,
453            FraudType::SegregationOfDutiesViolation => 4,
454            FraudType::CollusiveApproval => 5,
455            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
456            FraudType::ImproperRevenueRecognition => 5,
457            FraudType::ImproperPoAllocation => 4,
458            FraudType::VariableConsiderationManipulation => 4,
459            FraudType::ContractModificationMisstatement => 3,
460            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
461            FraudType::LeaseClassificationManipulation => 4,
462            FraudType::OffBalanceSheetLease => 5,
463            FraudType::LeaseLiabilityUnderstatement => 4,
464            FraudType::RouAssetMisstatement => 3,
465            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
466            FraudType::FairValueHierarchyManipulation => 4,
467            FraudType::Level3InputManipulation => 5,
468            FraudType::ValuationTechniqueManipulation => 4,
469            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
470            FraudType::DelayedImpairment => 4,
471            FraudType::ImpairmentTestAvoidance => 4,
472            FraudType::CashFlowProjectionManipulation => 5,
473            FraudType::ImproperImpairmentReversal => 3,
474            _ => 4,
475        }
476    }
477}
478
479/// Error types for error detection.
480#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
481pub enum ErrorType {
482    // Data Entry Errors
483    /// Duplicate document entry.
484    DuplicateEntry,
485    /// Reversed debit/credit amounts.
486    ReversedAmount,
487    /// Transposed digits in amount.
488    TransposedDigits,
489    /// Wrong decimal placement.
490    DecimalError,
491    /// Missing required field.
492    MissingField,
493    /// Invalid account code.
494    InvalidAccount,
495
496    // Timing Errors
497    /// Posted to wrong period.
498    WrongPeriod,
499    /// Backdated entry.
500    BackdatedEntry,
501    /// Future-dated entry.
502    FutureDatedEntry,
503    /// Cutoff error.
504    CutoffError,
505
506    // Classification Errors
507    /// Wrong account classification.
508    MisclassifiedAccount,
509    /// Wrong cost center.
510    WrongCostCenter,
511    /// Wrong company code.
512    WrongCompanyCode,
513
514    // Calculation Errors
515    /// Unbalanced journal entry.
516    UnbalancedEntry,
517    /// Rounding error.
518    RoundingError,
519    /// Currency conversion error.
520    CurrencyError,
521    /// Tax calculation error.
522    TaxCalculationError,
523
524    // Accounting Standards Errors (Non-Fraudulent)
525    /// Wrong revenue recognition timing (honest mistake).
526    RevenueTimingError,
527    /// Performance obligation allocation error.
528    PoAllocationError,
529    /// Lease classification error (operating vs finance).
530    LeaseClassificationError,
531    /// Lease calculation error (PV, amortization).
532    LeaseCalculationError,
533    /// Fair value measurement error.
534    FairValueError,
535    /// Impairment calculation error.
536    ImpairmentCalculationError,
537    /// Discount rate error.
538    DiscountRateError,
539    /// Framework application error (IFRS vs GAAP).
540    FrameworkApplicationError,
541}
542
543impl ErrorType {
544    /// Returns severity level (1-5).
545    pub fn severity(&self) -> u8 {
546        match self {
547            ErrorType::RoundingError => 1,
548            ErrorType::MissingField => 2,
549            ErrorType::TransposedDigits => 2,
550            ErrorType::DecimalError => 3,
551            ErrorType::DuplicateEntry => 3,
552            ErrorType::ReversedAmount => 3,
553            ErrorType::WrongPeriod => 4,
554            ErrorType::UnbalancedEntry => 5,
555            ErrorType::CurrencyError => 4,
556            // Accounting Standards Errors
557            ErrorType::RevenueTimingError => 4,
558            ErrorType::PoAllocationError => 3,
559            ErrorType::LeaseClassificationError => 3,
560            ErrorType::LeaseCalculationError => 3,
561            ErrorType::FairValueError => 4,
562            ErrorType::ImpairmentCalculationError => 4,
563            ErrorType::DiscountRateError => 3,
564            ErrorType::FrameworkApplicationError => 4,
565            _ => 3,
566        }
567    }
568}
569
570/// Process issue types.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
572pub enum ProcessIssueType {
573    // Approval Issues
574    /// Approval skipped entirely.
575    SkippedApproval,
576    /// Late approval (after posting).
577    LateApproval,
578    /// Missing supporting documentation.
579    MissingDocumentation,
580    /// Incomplete approval chain.
581    IncompleteApprovalChain,
582
583    // Timing Issues
584    /// Late posting.
585    LatePosting,
586    /// Posting outside business hours.
587    AfterHoursPosting,
588    /// Weekend/holiday posting.
589    WeekendPosting,
590    /// Rushed period-end posting.
591    RushedPeriodEnd,
592    /// Entry posted after the period-end close date (ISA 240.32).
593    /// Distinct from `RushedPeriodEnd` which flags pre-close volume spikes —
594    /// this variant specifically marks post-close adjustments.
595    PostClosePosting,
596
597    // Control Issues
598    /// Manual override of system control.
599    ManualOverride,
600    /// Unusual user access pattern.
601    UnusualAccess,
602    /// System bypass.
603    SystemBypass,
604    /// Batch processing anomaly.
605    BatchAnomaly,
606
607    // Documentation Issues
608    /// Vague or missing description.
609    VagueDescription,
610    /// Changed after posting.
611    PostFactoChange,
612    /// Incomplete audit trail.
613    IncompleteAuditTrail,
614
615    // Sourcing/Procurement Issues (S2C)
616    /// Purchasing outside of contracts (maverick spend).
617    MaverickSpend,
618    /// Purchasing against an expired contract.
619    ExpiredContractPurchase,
620    /// Overriding contracted price without authorization.
621    ContractPriceOverride,
622    /// Award given with only a single bid received.
623    SingleBidAward,
624    /// Bypassing supplier qualification requirements.
625    QualificationBypass,
626
627    // O2C Issues
628    /// Converting an expired quote to a sales order.
629    ExpiredQuoteConversion,
630}
631
632impl ProcessIssueType {
633    /// Returns severity level (1-5).
634    pub fn severity(&self) -> u8 {
635        match self {
636            ProcessIssueType::VagueDescription => 1,
637            ProcessIssueType::LatePosting => 2,
638            ProcessIssueType::AfterHoursPosting => 2,
639            ProcessIssueType::WeekendPosting => 2,
640            ProcessIssueType::PostClosePosting => 4,
641            ProcessIssueType::SkippedApproval => 4,
642            ProcessIssueType::ManualOverride => 4,
643            ProcessIssueType::SystemBypass => 5,
644            ProcessIssueType::IncompleteAuditTrail => 4,
645            _ => 3,
646        }
647    }
648}
649
650/// Statistical anomaly types.
651#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
652pub enum StatisticalAnomalyType {
653    // Amount Anomalies
654    /// Amount significantly above normal.
655    UnusuallyHighAmount,
656    /// Amount significantly below normal.
657    UnusuallyLowAmount,
658    /// Violates Benford's Law distribution.
659    BenfordViolation,
660    /// Exact duplicate amount (suspicious).
661    ExactDuplicateAmount,
662    /// Repeating pattern in amounts.
663    RepeatingAmount,
664
665    // Frequency Anomalies
666    /// Unusual transaction frequency.
667    UnusualFrequency,
668    /// Burst of transactions.
669    TransactionBurst,
670    /// Unusual time of day.
671    UnusualTiming,
672
673    // Trend Anomalies
674    /// Break in historical trend.
675    TrendBreak,
676    /// Sudden level shift.
677    LevelShift,
678    /// Seasonal pattern violation.
679    SeasonalAnomaly,
680
681    // Distribution Anomalies
682    /// Outlier in distribution.
683    StatisticalOutlier,
684    /// Change in variance.
685    VarianceChange,
686    /// Distribution shift.
687    DistributionShift,
688
689    // Sourcing/Contract Anomalies
690    /// Pattern of SLA breaches from a vendor.
691    SlaBreachPattern,
692    /// Contract with zero utilization.
693    UnusedContract,
694
695    // HR/Payroll Anomalies
696    /// Anomalous overtime patterns.
697    OvertimeAnomaly,
698}
699
700impl StatisticalAnomalyType {
701    /// Returns severity level (1-5).
702    pub fn severity(&self) -> u8 {
703        match self {
704            StatisticalAnomalyType::UnusualTiming => 1,
705            StatisticalAnomalyType::UnusualFrequency => 2,
706            StatisticalAnomalyType::BenfordViolation => 2,
707            StatisticalAnomalyType::UnusuallyHighAmount => 3,
708            StatisticalAnomalyType::TrendBreak => 3,
709            StatisticalAnomalyType::TransactionBurst => 4,
710            StatisticalAnomalyType::ExactDuplicateAmount => 3,
711            _ => 3,
712        }
713    }
714}
715
716/// Relational/graph anomaly types.
717#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
718pub enum RelationalAnomalyType {
719    // Transaction Pattern Anomalies
720    /// Circular transaction pattern.
721    CircularTransaction,
722    /// Unusual account combination.
723    UnusualAccountPair,
724    /// New trading partner.
725    NewCounterparty,
726    /// Dormant account suddenly active.
727    DormantAccountActivity,
728
729    // Network Anomalies
730    /// Unusual network centrality.
731    CentralityAnomaly,
732    /// Isolated transaction cluster.
733    IsolatedCluster,
734    /// Bridge node anomaly.
735    BridgeNodeAnomaly,
736    /// Community structure change.
737    CommunityAnomaly,
738
739    // Relationship Anomalies
740    /// Missing expected relationship.
741    MissingRelationship,
742    /// Unexpected relationship.
743    UnexpectedRelationship,
744    /// Relationship strength change.
745    RelationshipStrengthChange,
746
747    // Intercompany Anomalies
748    /// Unmatched intercompany transaction.
749    UnmatchedIntercompany,
750    /// Circular intercompany flow.
751    CircularIntercompany,
752    /// Transfer pricing anomaly.
753    TransferPricingAnomaly,
754}
755
756impl RelationalAnomalyType {
757    /// Returns severity level (1-5).
758    pub fn severity(&self) -> u8 {
759        match self {
760            RelationalAnomalyType::NewCounterparty => 1,
761            RelationalAnomalyType::DormantAccountActivity => 2,
762            RelationalAnomalyType::UnusualAccountPair => 2,
763            RelationalAnomalyType::CircularTransaction => 4,
764            RelationalAnomalyType::CircularIntercompany => 4,
765            RelationalAnomalyType::TransferPricingAnomaly => 4,
766            RelationalAnomalyType::UnmatchedIntercompany => 3,
767            _ => 3,
768        }
769    }
770}
771
772/// A labeled anomaly for supervised learning.
773#[derive(Debug, Clone, Serialize, Deserialize)]
774pub struct LabeledAnomaly {
775    /// Unique anomaly identifier.
776    pub anomaly_id: String,
777    /// Type of anomaly.
778    pub anomaly_type: AnomalyType,
779    /// Document or entity that contains the anomaly.
780    pub document_id: String,
781    /// Document type (JE, PO, Invoice, etc.).
782    pub document_type: String,
783    /// Company code.
784    pub company_code: String,
785    /// Date the anomaly occurred.
786    pub anomaly_date: NaiveDate,
787    /// Timestamp when detected/injected.
788    #[serde(with = "crate::serde_timestamp::naive")]
789    pub detection_timestamp: NaiveDateTime,
790    /// Confidence score (0.0 - 1.0) for injected anomalies.
791    pub confidence: f64,
792    /// Severity (1-5).
793    pub severity: u8,
794    /// Description of the anomaly.
795    pub description: String,
796    /// Related entities (user IDs, account codes, etc.).
797    pub related_entities: Vec<String>,
798    /// Monetary impact if applicable.
799    pub monetary_impact: Option<Decimal>,
800    /// Additional metadata.
801    pub metadata: HashMap<String, String>,
802    /// Whether this was injected (true) or naturally occurring (false).
803    pub is_injected: bool,
804    /// Injection strategy used (if injected) - legacy string field.
805    pub injection_strategy: Option<String>,
806    /// Cluster ID if part of an anomaly cluster.
807    pub cluster_id: Option<String>,
808
809    // ========================================
810    // PROVENANCE TRACKING FIELDS (Phase 1.2)
811    // ========================================
812    /// Hash of the original document before modification.
813    /// Enables tracking what the document looked like pre-injection.
814    #[serde(default, skip_serializing_if = "Option::is_none")]
815    pub original_document_hash: Option<String>,
816
817    /// Causal reason explaining why this anomaly was injected.
818    /// Provides "why" tracking for each anomaly.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub causal_reason: Option<AnomalyCausalReason>,
821
822    /// Structured injection strategy with parameters.
823    /// More detailed than the legacy string-based injection_strategy field.
824    #[serde(default, skip_serializing_if = "Option::is_none")]
825    pub structured_strategy: Option<InjectionStrategy>,
826
827    /// Parent anomaly ID if this was derived from another anomaly.
828    /// Enables anomaly transformation chains.
829    #[serde(default, skip_serializing_if = "Option::is_none")]
830    pub parent_anomaly_id: Option<String>,
831
832    /// Child anomaly IDs that were derived from this anomaly.
833    #[serde(default, skip_serializing_if = "Vec::is_empty")]
834    pub child_anomaly_ids: Vec<String>,
835
836    /// Scenario ID if this anomaly is part of a multi-step scenario.
837    #[serde(default, skip_serializing_if = "Option::is_none")]
838    pub scenario_id: Option<String>,
839
840    /// Generation run ID that produced this anomaly.
841    /// Enables tracing anomalies back to their generation run.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub run_id: Option<String>,
844
845    /// Seed used for RNG during generation.
846    /// Enables reproducibility.
847    #[serde(default, skip_serializing_if = "Option::is_none")]
848    pub generation_seed: Option<u64>,
849}
850
851impl LabeledAnomaly {
852    /// Creates a new labeled anomaly.
853    pub fn new(
854        anomaly_id: String,
855        anomaly_type: AnomalyType,
856        document_id: String,
857        document_type: String,
858        company_code: String,
859        anomaly_date: NaiveDate,
860    ) -> Self {
861        let severity = anomaly_type.severity();
862        let description = format!(
863            "{} - {} in document {}",
864            anomaly_type.category(),
865            anomaly_type.type_name(),
866            document_id
867        );
868
869        Self {
870            anomaly_id,
871            anomaly_type,
872            document_id,
873            document_type,
874            company_code,
875            anomaly_date,
876            detection_timestamp: chrono::Local::now().naive_local(),
877            confidence: 1.0,
878            severity,
879            description,
880            related_entities: Vec::new(),
881            monetary_impact: None,
882            metadata: HashMap::new(),
883            is_injected: true,
884            injection_strategy: None,
885            cluster_id: None,
886            // Provenance fields
887            original_document_hash: None,
888            causal_reason: None,
889            structured_strategy: None,
890            parent_anomaly_id: None,
891            child_anomaly_ids: Vec::new(),
892            scenario_id: None,
893            run_id: None,
894            generation_seed: None,
895        }
896    }
897
898    /// Sets the description.
899    pub fn with_description(mut self, description: &str) -> Self {
900        self.description = description.to_string();
901        self
902    }
903
904    /// Sets the monetary impact.
905    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
906        self.monetary_impact = Some(impact);
907        self
908    }
909
910    /// Adds a related entity.
911    pub fn with_related_entity(mut self, entity: &str) -> Self {
912        self.related_entities.push(entity.to_string());
913        self
914    }
915
916    /// Adds metadata.
917    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
918        self.metadata.insert(key.to_string(), value.to_string());
919        self
920    }
921
922    /// Sets the injection strategy (legacy string).
923    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
924        self.injection_strategy = Some(strategy.to_string());
925        self
926    }
927
928    /// Sets the cluster ID.
929    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
930        self.cluster_id = Some(cluster_id.to_string());
931        self
932    }
933
934    // ========================================
935    // PROVENANCE BUILDER METHODS (Phase 1.2)
936    // ========================================
937
938    /// Sets the original document hash for provenance tracking.
939    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
940        self.original_document_hash = Some(hash.to_string());
941        self
942    }
943
944    /// Sets the causal reason for this anomaly.
945    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
946        self.causal_reason = Some(reason);
947        self
948    }
949
950    /// Sets the structured injection strategy.
951    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
952        // Also set the legacy string field for backward compatibility
953        self.injection_strategy = Some(strategy.strategy_type().to_string());
954        self.structured_strategy = Some(strategy);
955        self
956    }
957
958    /// Sets the parent anomaly ID (for anomaly derivation chains).
959    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
960        self.parent_anomaly_id = Some(parent_id.to_string());
961        self
962    }
963
964    /// Adds a child anomaly ID.
965    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
966        self.child_anomaly_ids.push(child_id.to_string());
967        self
968    }
969
970    /// Sets the scenario ID for multi-step scenario tracking.
971    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
972        self.scenario_id = Some(scenario_id.to_string());
973        self
974    }
975
976    /// Sets the generation run ID.
977    pub fn with_run_id(mut self, run_id: &str) -> Self {
978        self.run_id = Some(run_id.to_string());
979        self
980    }
981
982    /// Sets the generation seed for reproducibility.
983    pub fn with_generation_seed(mut self, seed: u64) -> Self {
984        self.generation_seed = Some(seed);
985        self
986    }
987
988    /// Sets multiple provenance fields at once for convenience.
989    pub fn with_provenance(
990        mut self,
991        run_id: Option<&str>,
992        seed: Option<u64>,
993        causal_reason: Option<AnomalyCausalReason>,
994    ) -> Self {
995        if let Some(id) = run_id {
996            self.run_id = Some(id.to_string());
997        }
998        self.generation_seed = seed;
999        self.causal_reason = causal_reason;
1000        self
1001    }
1002
1003    /// Converts to a feature vector for ML.
1004    ///
1005    /// Returns a vector of 15 features:
1006    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1007    /// - 1 feature: Severity (normalized 0-1)
1008    /// - 1 feature: Confidence
1009    /// - 1 feature: Has monetary impact (0/1)
1010    /// - 1 feature: Monetary impact (log-scaled)
1011    /// - 1 feature: Is intentional (0/1)
1012    /// - 1 feature: Number of related entities
1013    /// - 1 feature: Is part of cluster (0/1)
1014    /// - 1 feature: Is part of scenario (0/1)
1015    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1016    pub fn to_features(&self) -> Vec<f64> {
1017        let mut features = Vec::new();
1018
1019        // Category one-hot encoding
1020        let categories = [
1021            "Fraud",
1022            "Error",
1023            "ProcessIssue",
1024            "Statistical",
1025            "Relational",
1026            "Custom",
1027        ];
1028        for cat in &categories {
1029            features.push(if self.anomaly_type.category() == *cat {
1030                1.0
1031            } else {
1032                0.0
1033            });
1034        }
1035
1036        // Severity (normalized)
1037        features.push(self.severity as f64 / 5.0);
1038
1039        // Confidence
1040        features.push(self.confidence);
1041
1042        // Has monetary impact
1043        features.push(if self.monetary_impact.is_some() {
1044            1.0
1045        } else {
1046            0.0
1047        });
1048
1049        // Monetary impact (log-scaled)
1050        if let Some(impact) = self.monetary_impact {
1051            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1052            features.push((impact_f64.abs() + 1.0).ln());
1053        } else {
1054            features.push(0.0);
1055        }
1056
1057        // Is intentional
1058        features.push(if self.anomaly_type.is_intentional() {
1059            1.0
1060        } else {
1061            0.0
1062        });
1063
1064        // Number of related entities
1065        features.push(self.related_entities.len() as f64);
1066
1067        // Is part of cluster
1068        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1069
1070        // Provenance features
1071        // Is part of scenario
1072        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1073
1074        // Has parent anomaly (indicates this is a derived anomaly)
1075        features.push(if self.parent_anomaly_id.is_some() {
1076            1.0
1077        } else {
1078            0.0
1079        });
1080
1081        features
1082    }
1083
1084    /// Returns the number of features in the feature vector.
1085    pub fn feature_count() -> usize {
1086        15 // 6 category + 9 other features
1087    }
1088
1089    /// Returns feature names for documentation/ML metadata.
1090    pub fn feature_names() -> Vec<&'static str> {
1091        vec![
1092            "category_fraud",
1093            "category_error",
1094            "category_process_issue",
1095            "category_statistical",
1096            "category_relational",
1097            "category_custom",
1098            "severity_normalized",
1099            "confidence",
1100            "has_monetary_impact",
1101            "monetary_impact_log",
1102            "is_intentional",
1103            "related_entity_count",
1104            "is_clustered",
1105            "is_scenario_part",
1106            "is_derived",
1107        ]
1108    }
1109}
1110
1111/// Summary of anomalies for reporting.
1112#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1113pub struct AnomalySummary {
1114    /// Total anomaly count.
1115    pub total_count: usize,
1116    /// Count by category.
1117    pub by_category: HashMap<String, usize>,
1118    /// Count by specific type.
1119    pub by_type: HashMap<String, usize>,
1120    /// Count by severity.
1121    pub by_severity: HashMap<u8, usize>,
1122    /// Count by company.
1123    pub by_company: HashMap<String, usize>,
1124    /// Total monetary impact.
1125    pub total_monetary_impact: Decimal,
1126    /// Date range.
1127    pub date_range: Option<(NaiveDate, NaiveDate)>,
1128    /// Number of clusters.
1129    pub cluster_count: usize,
1130}
1131
1132impl AnomalySummary {
1133    /// Creates a summary from a list of anomalies.
1134    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1135        let mut summary = AnomalySummary {
1136            total_count: anomalies.len(),
1137            ..Default::default()
1138        };
1139
1140        let mut min_date: Option<NaiveDate> = None;
1141        let mut max_date: Option<NaiveDate> = None;
1142        let mut clusters = std::collections::HashSet::new();
1143
1144        for anomaly in anomalies {
1145            // By category
1146            *summary
1147                .by_category
1148                .entry(anomaly.anomaly_type.category().to_string())
1149                .or_insert(0) += 1;
1150
1151            // By type
1152            *summary
1153                .by_type
1154                .entry(anomaly.anomaly_type.type_name())
1155                .or_insert(0) += 1;
1156
1157            // By severity
1158            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1159
1160            // By company
1161            *summary
1162                .by_company
1163                .entry(anomaly.company_code.clone())
1164                .or_insert(0) += 1;
1165
1166            // Monetary impact
1167            if let Some(impact) = anomaly.monetary_impact {
1168                summary.total_monetary_impact += impact;
1169            }
1170
1171            // Date range
1172            match min_date {
1173                None => min_date = Some(anomaly.anomaly_date),
1174                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1175                _ => {}
1176            }
1177            match max_date {
1178                None => max_date = Some(anomaly.anomaly_date),
1179                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1180                _ => {}
1181            }
1182
1183            // Clusters
1184            if let Some(cluster_id) = &anomaly.cluster_id {
1185                clusters.insert(cluster_id.clone());
1186            }
1187        }
1188
1189        summary.date_range = min_date.zip(max_date);
1190        summary.cluster_count = clusters.len();
1191
1192        summary
1193    }
1194}
1195
1196// ============================================================================
1197// ENHANCED ANOMALY TAXONOMY (FR-003)
1198// ============================================================================
1199
1200/// High-level anomaly category for multi-class classification.
1201///
1202/// These categories provide a more granular classification than the base
1203/// AnomalyType enum, enabling better ML model training and audit reporting.
1204#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1205pub enum AnomalyCategory {
1206    // Vendor-related anomalies
1207    /// Fictitious or shell vendor.
1208    FictitiousVendor,
1209    /// Kickback or collusion with vendor.
1210    VendorKickback,
1211    /// Related party vendor transactions.
1212    RelatedPartyVendor,
1213
1214    // Transaction-related anomalies
1215    /// Duplicate payment or invoice.
1216    DuplicatePayment,
1217    /// Unauthorized transaction.
1218    UnauthorizedTransaction,
1219    /// Structured transactions to avoid thresholds.
1220    StructuredTransaction,
1221
1222    // Pattern-based anomalies
1223    /// Circular flow of funds.
1224    CircularFlow,
1225    /// Behavioral anomaly (deviation from normal patterns).
1226    BehavioralAnomaly,
1227    /// Timing-based anomaly.
1228    TimingAnomaly,
1229
1230    // Journal entry anomalies
1231    /// Manual journal entry anomaly.
1232    JournalAnomaly,
1233    /// Manual override of controls.
1234    ManualOverride,
1235    /// Missing approval in chain.
1236    MissingApproval,
1237
1238    // Statistical anomalies
1239    /// Statistical outlier.
1240    StatisticalOutlier,
1241    /// Distribution anomaly (Benford, etc.).
1242    DistributionAnomaly,
1243
1244    // Custom category
1245    /// User-defined category.
1246    Custom(String),
1247}
1248
1249impl AnomalyCategory {
1250    /// Derives an AnomalyCategory from an AnomalyType.
1251    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1252        match anomaly_type {
1253            AnomalyType::Fraud(fraud_type) => match fraud_type {
1254                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1255                    AnomalyCategory::FictitiousVendor
1256                }
1257                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1258                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1259                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1260                    AnomalyCategory::StructuredTransaction
1261                }
1262                FraudType::SelfApproval
1263                | FraudType::UnauthorizedApproval
1264                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1265                FraudType::TimingAnomaly
1266                | FraudType::RoundDollarManipulation
1267                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1268                _ => AnomalyCategory::BehavioralAnomaly,
1269            },
1270            AnomalyType::Error(error_type) => match error_type {
1271                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1272                ErrorType::WrongPeriod
1273                | ErrorType::BackdatedEntry
1274                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1275                _ => AnomalyCategory::JournalAnomaly,
1276            },
1277            AnomalyType::ProcessIssue(process_type) => match process_type {
1278                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1279                    AnomalyCategory::MissingApproval
1280                }
1281                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1282                    AnomalyCategory::ManualOverride
1283                }
1284                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1285                    AnomalyCategory::TimingAnomaly
1286                }
1287                _ => AnomalyCategory::BehavioralAnomaly,
1288            },
1289            AnomalyType::Statistical(stat_type) => match stat_type {
1290                StatisticalAnomalyType::BenfordViolation
1291                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1292                _ => AnomalyCategory::StatisticalOutlier,
1293            },
1294            AnomalyType::Relational(rel_type) => match rel_type {
1295                RelationalAnomalyType::CircularTransaction
1296                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1297                _ => AnomalyCategory::BehavioralAnomaly,
1298            },
1299            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1300        }
1301    }
1302
1303    /// Returns the category name as a string.
1304    pub fn name(&self) -> &str {
1305        match self {
1306            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1307            AnomalyCategory::VendorKickback => "vendor_kickback",
1308            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1309            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1310            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1311            AnomalyCategory::StructuredTransaction => "structured_transaction",
1312            AnomalyCategory::CircularFlow => "circular_flow",
1313            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1314            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1315            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1316            AnomalyCategory::ManualOverride => "manual_override",
1317            AnomalyCategory::MissingApproval => "missing_approval",
1318            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1319            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1320            AnomalyCategory::Custom(s) => s.as_str(),
1321        }
1322    }
1323
1324    /// Returns the ordinal value for ML encoding.
1325    pub fn ordinal(&self) -> u8 {
1326        match self {
1327            AnomalyCategory::FictitiousVendor => 0,
1328            AnomalyCategory::VendorKickback => 1,
1329            AnomalyCategory::RelatedPartyVendor => 2,
1330            AnomalyCategory::DuplicatePayment => 3,
1331            AnomalyCategory::UnauthorizedTransaction => 4,
1332            AnomalyCategory::StructuredTransaction => 5,
1333            AnomalyCategory::CircularFlow => 6,
1334            AnomalyCategory::BehavioralAnomaly => 7,
1335            AnomalyCategory::TimingAnomaly => 8,
1336            AnomalyCategory::JournalAnomaly => 9,
1337            AnomalyCategory::ManualOverride => 10,
1338            AnomalyCategory::MissingApproval => 11,
1339            AnomalyCategory::StatisticalOutlier => 12,
1340            AnomalyCategory::DistributionAnomaly => 13,
1341            AnomalyCategory::Custom(_) => 14,
1342        }
1343    }
1344
1345    /// Returns the total number of categories (excluding Custom).
1346    pub fn category_count() -> usize {
1347        15 // 14 fixed categories + Custom
1348    }
1349}
1350
1351/// Type of contributing factor for anomaly confidence/severity calculation.
1352#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1353pub enum FactorType {
1354    /// Amount deviation from expected value.
1355    AmountDeviation,
1356    /// Proximity to approval/reporting threshold.
1357    ThresholdProximity,
1358    /// Timing-related anomaly indicator.
1359    TimingAnomaly,
1360    /// Entity risk score contribution.
1361    EntityRisk,
1362    /// Pattern match confidence.
1363    PatternMatch,
1364    /// Frequency deviation from normal.
1365    FrequencyDeviation,
1366    /// Relationship-based anomaly indicator.
1367    RelationshipAnomaly,
1368    /// Control bypass indicator.
1369    ControlBypass,
1370    /// Benford's Law violation.
1371    BenfordViolation,
1372    /// Duplicate indicator.
1373    DuplicateIndicator,
1374    /// Approval chain issue.
1375    ApprovalChainIssue,
1376    /// Documentation gap.
1377    DocumentationGap,
1378    /// Custom factor type.
1379    Custom,
1380}
1381
1382impl FactorType {
1383    /// Returns the factor type name.
1384    pub fn name(&self) -> &'static str {
1385        match self {
1386            FactorType::AmountDeviation => "amount_deviation",
1387            FactorType::ThresholdProximity => "threshold_proximity",
1388            FactorType::TimingAnomaly => "timing_anomaly",
1389            FactorType::EntityRisk => "entity_risk",
1390            FactorType::PatternMatch => "pattern_match",
1391            FactorType::FrequencyDeviation => "frequency_deviation",
1392            FactorType::RelationshipAnomaly => "relationship_anomaly",
1393            FactorType::ControlBypass => "control_bypass",
1394            FactorType::BenfordViolation => "benford_violation",
1395            FactorType::DuplicateIndicator => "duplicate_indicator",
1396            FactorType::ApprovalChainIssue => "approval_chain_issue",
1397            FactorType::DocumentationGap => "documentation_gap",
1398            FactorType::Custom => "custom",
1399        }
1400    }
1401}
1402
1403/// Evidence supporting a contributing factor.
1404#[derive(Debug, Clone, Serialize, Deserialize)]
1405pub struct FactorEvidence {
1406    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1407    pub source: String,
1408    /// Raw evidence data.
1409    pub data: HashMap<String, String>,
1410}
1411
1412/// A contributing factor to anomaly confidence/severity.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414pub struct ContributingFactor {
1415    /// Type of factor.
1416    pub factor_type: FactorType,
1417    /// Observed value.
1418    pub value: f64,
1419    /// Threshold or expected value.
1420    pub threshold: f64,
1421    /// Direction of comparison (true = value > threshold is anomalous).
1422    pub direction_greater: bool,
1423    /// Weight of this factor in overall calculation (0.0 - 1.0).
1424    pub weight: f64,
1425    /// Human-readable description.
1426    pub description: String,
1427    /// Optional supporting evidence.
1428    pub evidence: Option<FactorEvidence>,
1429}
1430
1431impl ContributingFactor {
1432    /// Creates a new contributing factor.
1433    pub fn new(
1434        factor_type: FactorType,
1435        value: f64,
1436        threshold: f64,
1437        direction_greater: bool,
1438        weight: f64,
1439        description: &str,
1440    ) -> Self {
1441        Self {
1442            factor_type,
1443            value,
1444            threshold,
1445            direction_greater,
1446            weight,
1447            description: description.to_string(),
1448            evidence: None,
1449        }
1450    }
1451
1452    /// Adds evidence to the factor.
1453    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1454        self.evidence = Some(FactorEvidence {
1455            source: source.to_string(),
1456            data,
1457        });
1458        self
1459    }
1460
1461    /// Calculates the factor's contribution to anomaly score.
1462    pub fn contribution(&self) -> f64 {
1463        let deviation = if self.direction_greater {
1464            (self.value - self.threshold).max(0.0)
1465        } else {
1466            (self.threshold - self.value).max(0.0)
1467        };
1468
1469        // Normalize by threshold to get relative deviation
1470        let relative_deviation = if self.threshold.abs() > 0.001 {
1471            deviation / self.threshold.abs()
1472        } else {
1473            deviation
1474        };
1475
1476        // Apply weight and cap at 1.0
1477        (relative_deviation * self.weight).min(1.0)
1478    }
1479}
1480
1481/// Enhanced anomaly label with dynamic confidence and severity.
1482#[derive(Debug, Clone, Serialize, Deserialize)]
1483pub struct EnhancedAnomalyLabel {
1484    /// Base labeled anomaly (backward compatible).
1485    pub base: LabeledAnomaly,
1486    /// Enhanced category classification.
1487    pub category: AnomalyCategory,
1488    /// Dynamically calculated confidence (0.0 - 1.0).
1489    pub enhanced_confidence: f64,
1490    /// Contextually calculated severity (0.0 - 1.0).
1491    pub enhanced_severity: f64,
1492    /// Factors contributing to confidence/severity.
1493    pub contributing_factors: Vec<ContributingFactor>,
1494    /// Secondary categories (for multi-label classification).
1495    pub secondary_categories: Vec<AnomalyCategory>,
1496}
1497
1498impl EnhancedAnomalyLabel {
1499    /// Creates an enhanced label from a base labeled anomaly.
1500    pub fn from_base(base: LabeledAnomaly) -> Self {
1501        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1502        let enhanced_confidence = base.confidence;
1503        let enhanced_severity = base.severity as f64 / 5.0;
1504
1505        Self {
1506            base,
1507            category,
1508            enhanced_confidence,
1509            enhanced_severity,
1510            contributing_factors: Vec::new(),
1511            secondary_categories: Vec::new(),
1512        }
1513    }
1514
1515    /// Sets the enhanced confidence.
1516    pub fn with_confidence(mut self, confidence: f64) -> Self {
1517        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1518        self
1519    }
1520
1521    /// Sets the enhanced severity.
1522    pub fn with_severity(mut self, severity: f64) -> Self {
1523        self.enhanced_severity = severity.clamp(0.0, 1.0);
1524        self
1525    }
1526
1527    /// Adds a contributing factor.
1528    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1529        self.contributing_factors.push(factor);
1530        self
1531    }
1532
1533    /// Adds a secondary category.
1534    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1535        if !self.secondary_categories.contains(&category) && category != self.category {
1536            self.secondary_categories.push(category);
1537        }
1538        self
1539    }
1540
1541    /// Converts to an extended feature vector.
1542    ///
1543    /// Returns base features (15) + enhanced features (10) = 25 features.
1544    pub fn to_features(&self) -> Vec<f64> {
1545        let mut features = self.base.to_features();
1546
1547        // Enhanced features
1548        features.push(self.enhanced_confidence);
1549        features.push(self.enhanced_severity);
1550        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1551        features.push(self.secondary_categories.len() as f64);
1552        features.push(self.contributing_factors.len() as f64);
1553
1554        // Max factor weight
1555        let max_weight = self
1556            .contributing_factors
1557            .iter()
1558            .map(|f| f.weight)
1559            .fold(0.0, f64::max);
1560        features.push(max_weight);
1561
1562        // Factor type indicators (binary flags for key factor types)
1563        let has_control_bypass = self
1564            .contributing_factors
1565            .iter()
1566            .any(|f| f.factor_type == FactorType::ControlBypass);
1567        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1568
1569        let has_amount_deviation = self
1570            .contributing_factors
1571            .iter()
1572            .any(|f| f.factor_type == FactorType::AmountDeviation);
1573        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1574
1575        let has_timing = self
1576            .contributing_factors
1577            .iter()
1578            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1579        features.push(if has_timing { 1.0 } else { 0.0 });
1580
1581        let has_pattern_match = self
1582            .contributing_factors
1583            .iter()
1584            .any(|f| f.factor_type == FactorType::PatternMatch);
1585        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1586
1587        features
1588    }
1589
1590    /// Returns the number of features in the enhanced feature vector.
1591    pub fn feature_count() -> usize {
1592        25 // 15 base + 10 enhanced
1593    }
1594
1595    /// Returns feature names for the enhanced feature vector.
1596    pub fn feature_names() -> Vec<&'static str> {
1597        let mut names = LabeledAnomaly::feature_names();
1598        names.extend(vec![
1599            "enhanced_confidence",
1600            "enhanced_severity",
1601            "category_ordinal",
1602            "secondary_category_count",
1603            "contributing_factor_count",
1604            "max_factor_weight",
1605            "has_control_bypass",
1606            "has_amount_deviation",
1607            "has_timing_factor",
1608            "has_pattern_match",
1609        ]);
1610        names
1611    }
1612}
1613
1614// ============================================================================
1615// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1616// ============================================================================
1617
1618/// Severity level classification for anomalies.
1619#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1620pub enum SeverityLevel {
1621    /// Minor issue, low impact.
1622    Low,
1623    /// Moderate issue, noticeable impact.
1624    #[default]
1625    Medium,
1626    /// Significant issue, substantial impact.
1627    High,
1628    /// Critical issue, severe impact requiring immediate attention.
1629    Critical,
1630}
1631
1632impl SeverityLevel {
1633    /// Returns the numeric value (1-4) for the severity level.
1634    pub fn numeric(&self) -> u8 {
1635        match self {
1636            SeverityLevel::Low => 1,
1637            SeverityLevel::Medium => 2,
1638            SeverityLevel::High => 3,
1639            SeverityLevel::Critical => 4,
1640        }
1641    }
1642
1643    /// Creates a severity level from a numeric value.
1644    pub fn from_numeric(value: u8) -> Self {
1645        match value {
1646            1 => SeverityLevel::Low,
1647            2 => SeverityLevel::Medium,
1648            3 => SeverityLevel::High,
1649            _ => SeverityLevel::Critical,
1650        }
1651    }
1652
1653    /// Creates a severity level from a normalized score (0.0-1.0).
1654    pub fn from_score(score: f64) -> Self {
1655        match score {
1656            s if s < 0.25 => SeverityLevel::Low,
1657            s if s < 0.50 => SeverityLevel::Medium,
1658            s if s < 0.75 => SeverityLevel::High,
1659            _ => SeverityLevel::Critical,
1660        }
1661    }
1662
1663    /// Returns a normalized score (0.0-1.0) for this severity level.
1664    pub fn to_score(&self) -> f64 {
1665        match self {
1666            SeverityLevel::Low => 0.125,
1667            SeverityLevel::Medium => 0.375,
1668            SeverityLevel::High => 0.625,
1669            SeverityLevel::Critical => 0.875,
1670        }
1671    }
1672}
1673
1674/// Structured severity scoring for anomalies.
1675#[derive(Debug, Clone, Serialize, Deserialize)]
1676pub struct AnomalySeverity {
1677    /// Severity level classification.
1678    pub level: SeverityLevel,
1679    /// Continuous severity score (0.0-1.0).
1680    pub score: f64,
1681    /// Absolute financial impact amount.
1682    pub financial_impact: Decimal,
1683    /// Whether this exceeds materiality threshold.
1684    pub is_material: bool,
1685    /// Materiality threshold used for determination.
1686    #[serde(default, skip_serializing_if = "Option::is_none")]
1687    pub materiality_threshold: Option<Decimal>,
1688}
1689
1690impl AnomalySeverity {
1691    /// Creates a new severity assessment.
1692    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1693        Self {
1694            level,
1695            score: level.to_score(),
1696            financial_impact,
1697            is_material: false,
1698            materiality_threshold: None,
1699        }
1700    }
1701
1702    /// Creates severity from a score, auto-determining level.
1703    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1704        Self {
1705            level: SeverityLevel::from_score(score),
1706            score: score.clamp(0.0, 1.0),
1707            financial_impact,
1708            is_material: false,
1709            materiality_threshold: None,
1710        }
1711    }
1712
1713    /// Sets the materiality assessment.
1714    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1715        self.materiality_threshold = Some(threshold);
1716        self.is_material = self.financial_impact.abs() >= threshold;
1717        self
1718    }
1719}
1720
1721impl Default for AnomalySeverity {
1722    fn default() -> Self {
1723        Self {
1724            level: SeverityLevel::Medium,
1725            score: 0.5,
1726            financial_impact: Decimal::ZERO,
1727            is_material: false,
1728            materiality_threshold: None,
1729        }
1730    }
1731}
1732
1733/// Detection difficulty classification for anomalies.
1734///
1735/// Categorizes how difficult an anomaly is to detect, which is useful
1736/// for ML model benchmarking and audit procedure selection.
1737///
1738/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1739/// is used for drift event classification and has different variants.
1740#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1741pub enum AnomalyDetectionDifficulty {
1742    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1743    Trivial,
1744    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1745    Easy,
1746    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1747    #[default]
1748    Moderate,
1749    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1750    Hard,
1751    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1752    Expert,
1753}
1754
1755impl AnomalyDetectionDifficulty {
1756    /// Returns the expected detection rate for this difficulty level.
1757    pub fn expected_detection_rate(&self) -> f64 {
1758        match self {
1759            AnomalyDetectionDifficulty::Trivial => 0.99,
1760            AnomalyDetectionDifficulty::Easy => 0.90,
1761            AnomalyDetectionDifficulty::Moderate => 0.70,
1762            AnomalyDetectionDifficulty::Hard => 0.40,
1763            AnomalyDetectionDifficulty::Expert => 0.15,
1764        }
1765    }
1766
1767    /// Returns a numeric difficulty score (0.0-1.0).
1768    pub fn difficulty_score(&self) -> f64 {
1769        match self {
1770            AnomalyDetectionDifficulty::Trivial => 0.05,
1771            AnomalyDetectionDifficulty::Easy => 0.25,
1772            AnomalyDetectionDifficulty::Moderate => 0.50,
1773            AnomalyDetectionDifficulty::Hard => 0.75,
1774            AnomalyDetectionDifficulty::Expert => 0.95,
1775        }
1776    }
1777
1778    /// Creates a difficulty level from a score (0.0-1.0).
1779    pub fn from_score(score: f64) -> Self {
1780        match score {
1781            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1782            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1783            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1784            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1785            _ => AnomalyDetectionDifficulty::Expert,
1786        }
1787    }
1788
1789    /// Returns the name of this difficulty level.
1790    pub fn name(&self) -> &'static str {
1791        match self {
1792            AnomalyDetectionDifficulty::Trivial => "trivial",
1793            AnomalyDetectionDifficulty::Easy => "easy",
1794            AnomalyDetectionDifficulty::Moderate => "moderate",
1795            AnomalyDetectionDifficulty::Hard => "hard",
1796            AnomalyDetectionDifficulty::Expert => "expert",
1797        }
1798    }
1799}
1800
1801/// Ground truth certainty level for anomaly labels.
1802///
1803/// Indicates how certain we are that the label is correct.
1804#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1805pub enum GroundTruthCertainty {
1806    /// Definitively known (injected anomaly with full provenance).
1807    #[default]
1808    Definite,
1809    /// Highly probable based on strong evidence.
1810    Probable,
1811    /// Possibly an anomaly based on indirect evidence.
1812    Possible,
1813}
1814
1815impl GroundTruthCertainty {
1816    /// Returns a certainty score (0.0-1.0).
1817    pub fn certainty_score(&self) -> f64 {
1818        match self {
1819            GroundTruthCertainty::Definite => 1.0,
1820            GroundTruthCertainty::Probable => 0.8,
1821            GroundTruthCertainty::Possible => 0.5,
1822        }
1823    }
1824
1825    /// Returns the name of this certainty level.
1826    pub fn name(&self) -> &'static str {
1827        match self {
1828            GroundTruthCertainty::Definite => "definite",
1829            GroundTruthCertainty::Probable => "probable",
1830            GroundTruthCertainty::Possible => "possible",
1831        }
1832    }
1833}
1834
1835/// Detection method classification.
1836///
1837/// Indicates which detection methods are recommended or effective for an anomaly.
1838#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1839pub enum DetectionMethod {
1840    /// Simple rule-based detection (thresholds, filters).
1841    RuleBased,
1842    /// Statistical analysis (distributions, outlier detection).
1843    Statistical,
1844    /// Machine learning models (classification, anomaly detection).
1845    MachineLearning,
1846    /// Graph-based analysis (network patterns, relationships).
1847    GraphBased,
1848    /// Manual forensic audit procedures.
1849    ForensicAudit,
1850    /// Combination of multiple methods.
1851    Hybrid,
1852}
1853
1854impl DetectionMethod {
1855    /// Returns the name of this detection method.
1856    pub fn name(&self) -> &'static str {
1857        match self {
1858            DetectionMethod::RuleBased => "rule_based",
1859            DetectionMethod::Statistical => "statistical",
1860            DetectionMethod::MachineLearning => "machine_learning",
1861            DetectionMethod::GraphBased => "graph_based",
1862            DetectionMethod::ForensicAudit => "forensic_audit",
1863            DetectionMethod::Hybrid => "hybrid",
1864        }
1865    }
1866
1867    /// Returns a description of this detection method.
1868    pub fn description(&self) -> &'static str {
1869        match self {
1870            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1871            DetectionMethod::Statistical => "Statistical distribution analysis",
1872            DetectionMethod::MachineLearning => "ML classification models",
1873            DetectionMethod::GraphBased => "Network and relationship analysis",
1874            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1875            DetectionMethod::Hybrid => "Combined multi-method approach",
1876        }
1877    }
1878}
1879
1880/// Extended anomaly label with comprehensive multi-dimensional classification.
1881///
1882/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1883/// severity scoring, detection difficulty, recommended methods, and ground truth.
1884#[derive(Debug, Clone, Serialize, Deserialize)]
1885pub struct ExtendedAnomalyLabel {
1886    /// Base labeled anomaly.
1887    pub base: LabeledAnomaly,
1888    /// Enhanced category classification.
1889    pub category: AnomalyCategory,
1890    /// Structured severity assessment.
1891    pub severity: AnomalySeverity,
1892    /// Detection difficulty classification.
1893    pub detection_difficulty: AnomalyDetectionDifficulty,
1894    /// Recommended detection methods for this anomaly.
1895    pub recommended_methods: Vec<DetectionMethod>,
1896    /// Key indicators that should trigger detection.
1897    pub key_indicators: Vec<String>,
1898    /// Ground truth certainty level.
1899    pub ground_truth_certainty: GroundTruthCertainty,
1900    /// Contributing factors to confidence/severity.
1901    pub contributing_factors: Vec<ContributingFactor>,
1902    /// Related entity IDs (vendors, customers, employees, etc.).
1903    pub related_entity_ids: Vec<String>,
1904    /// Secondary categories for multi-label classification.
1905    pub secondary_categories: Vec<AnomalyCategory>,
1906    /// Scheme ID if part of a multi-stage fraud scheme.
1907    #[serde(default, skip_serializing_if = "Option::is_none")]
1908    pub scheme_id: Option<String>,
1909    /// Stage number within a scheme (1-indexed).
1910    #[serde(default, skip_serializing_if = "Option::is_none")]
1911    pub scheme_stage: Option<u32>,
1912    /// Whether this is a near-miss (suspicious but legitimate).
1913    #[serde(default)]
1914    pub is_near_miss: bool,
1915    /// Explanation if this is a near-miss.
1916    #[serde(default, skip_serializing_if = "Option::is_none")]
1917    pub near_miss_explanation: Option<String>,
1918}
1919
1920impl ExtendedAnomalyLabel {
1921    /// Creates an extended label from a base labeled anomaly.
1922    pub fn from_base(base: LabeledAnomaly) -> Self {
1923        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1924        let severity = AnomalySeverity {
1925            level: SeverityLevel::from_numeric(base.severity),
1926            score: base.severity as f64 / 5.0,
1927            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1928            is_material: false,
1929            materiality_threshold: None,
1930        };
1931
1932        Self {
1933            base,
1934            category,
1935            severity,
1936            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1937            recommended_methods: vec![DetectionMethod::RuleBased],
1938            key_indicators: Vec::new(),
1939            ground_truth_certainty: GroundTruthCertainty::Definite,
1940            contributing_factors: Vec::new(),
1941            related_entity_ids: Vec::new(),
1942            secondary_categories: Vec::new(),
1943            scheme_id: None,
1944            scheme_stage: None,
1945            is_near_miss: false,
1946            near_miss_explanation: None,
1947        }
1948    }
1949
1950    /// Sets the severity assessment.
1951    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1952        self.severity = severity;
1953        self
1954    }
1955
1956    /// Sets the detection difficulty.
1957    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1958        self.detection_difficulty = difficulty;
1959        self
1960    }
1961
1962    /// Adds a recommended detection method.
1963    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1964        if !self.recommended_methods.contains(&method) {
1965            self.recommended_methods.push(method);
1966        }
1967        self
1968    }
1969
1970    /// Sets the recommended detection methods.
1971    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1972        self.recommended_methods = methods;
1973        self
1974    }
1975
1976    /// Adds a key indicator.
1977    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1978        self.key_indicators.push(indicator.into());
1979        self
1980    }
1981
1982    /// Sets the ground truth certainty.
1983    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1984        self.ground_truth_certainty = certainty;
1985        self
1986    }
1987
1988    /// Adds a contributing factor.
1989    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1990        self.contributing_factors.push(factor);
1991        self
1992    }
1993
1994    /// Adds a related entity ID.
1995    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
1996        self.related_entity_ids.push(entity_id.into());
1997        self
1998    }
1999
2000    /// Adds a secondary category.
2001    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2002        if category != self.category && !self.secondary_categories.contains(&category) {
2003            self.secondary_categories.push(category);
2004        }
2005        self
2006    }
2007
2008    /// Sets scheme information.
2009    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2010        self.scheme_id = Some(scheme_id.into());
2011        self.scheme_stage = Some(stage);
2012        self
2013    }
2014
2015    /// Marks this as a near-miss with explanation.
2016    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2017        self.is_near_miss = true;
2018        self.near_miss_explanation = Some(explanation.into());
2019        self
2020    }
2021
2022    /// Converts to an extended feature vector for ML.
2023    ///
2024    /// Returns base features (15) + extended features (15) = 30 features.
2025    pub fn to_features(&self) -> Vec<f64> {
2026        let mut features = self.base.to_features();
2027
2028        // Extended features
2029        features.push(self.severity.score);
2030        features.push(self.severity.level.to_score());
2031        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2032        features.push(self.detection_difficulty.difficulty_score());
2033        features.push(self.detection_difficulty.expected_detection_rate());
2034        features.push(self.ground_truth_certainty.certainty_score());
2035        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2036        features.push(self.secondary_categories.len() as f64);
2037        features.push(self.contributing_factors.len() as f64);
2038        features.push(self.key_indicators.len() as f64);
2039        features.push(self.recommended_methods.len() as f64);
2040        features.push(self.related_entity_ids.len() as f64);
2041        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2042        features.push(self.scheme_stage.unwrap_or(0) as f64);
2043        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2044
2045        features
2046    }
2047
2048    /// Returns the number of features in the extended feature vector.
2049    pub fn feature_count() -> usize {
2050        30 // 15 base + 15 extended
2051    }
2052
2053    /// Returns feature names for the extended feature vector.
2054    pub fn feature_names() -> Vec<&'static str> {
2055        let mut names = LabeledAnomaly::feature_names();
2056        names.extend(vec![
2057            "severity_score",
2058            "severity_level_score",
2059            "is_material",
2060            "difficulty_score",
2061            "expected_detection_rate",
2062            "ground_truth_certainty",
2063            "category_ordinal",
2064            "secondary_category_count",
2065            "contributing_factor_count",
2066            "key_indicator_count",
2067            "recommended_method_count",
2068            "related_entity_count",
2069            "is_part_of_scheme",
2070            "scheme_stage",
2071            "is_near_miss",
2072        ]);
2073        names
2074    }
2075}
2076
2077// ============================================================================
2078// MULTI-STAGE FRAUD SCHEME TYPES
2079// ============================================================================
2080
2081/// Type of multi-stage fraud scheme.
2082#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2083pub enum SchemeType {
2084    /// Gradual embezzlement over time.
2085    GradualEmbezzlement,
2086    /// Revenue manipulation across periods.
2087    RevenueManipulation,
2088    /// Vendor kickback scheme.
2089    VendorKickback,
2090    /// Round-tripping funds through multiple entities.
2091    RoundTripping,
2092    /// Ghost employee scheme.
2093    GhostEmployee,
2094    /// Expense reimbursement fraud.
2095    ExpenseReimbursement,
2096    /// Inventory theft scheme.
2097    InventoryTheft,
2098    /// Custom scheme type.
2099    Custom,
2100}
2101
2102impl SchemeType {
2103    /// Returns the name of this scheme type.
2104    pub fn name(&self) -> &'static str {
2105        match self {
2106            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2107            SchemeType::RevenueManipulation => "revenue_manipulation",
2108            SchemeType::VendorKickback => "vendor_kickback",
2109            SchemeType::RoundTripping => "round_tripping",
2110            SchemeType::GhostEmployee => "ghost_employee",
2111            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2112            SchemeType::InventoryTheft => "inventory_theft",
2113            SchemeType::Custom => "custom",
2114        }
2115    }
2116
2117    /// Returns the typical number of stages for this scheme type.
2118    pub fn typical_stages(&self) -> u32 {
2119        match self {
2120            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2121            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2122            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2123            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2124            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2125            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2126            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2127            SchemeType::Custom => 4,
2128        }
2129    }
2130}
2131
2132/// Status of detection for a fraud scheme.
2133#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2134pub enum SchemeDetectionStatus {
2135    /// Scheme is undetected.
2136    #[default]
2137    Undetected,
2138    /// Under investigation but not confirmed.
2139    UnderInvestigation,
2140    /// Partially detected (some transactions flagged).
2141    PartiallyDetected,
2142    /// Fully detected and confirmed.
2143    FullyDetected,
2144}
2145
2146/// Reference to a transaction within a scheme.
2147#[derive(Debug, Clone, Serialize, Deserialize)]
2148pub struct SchemeTransactionRef {
2149    /// Document ID of the transaction.
2150    pub document_id: String,
2151    /// Transaction date.
2152    pub date: chrono::NaiveDate,
2153    /// Transaction amount.
2154    pub amount: Decimal,
2155    /// Stage this transaction belongs to.
2156    pub stage: u32,
2157    /// Anomaly ID if labeled.
2158    #[serde(default, skip_serializing_if = "Option::is_none")]
2159    pub anomaly_id: Option<String>,
2160}
2161
2162/// Concealment technique used in fraud.
2163#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2164pub enum ConcealmentTechnique {
2165    /// Document manipulation or forgery.
2166    DocumentManipulation,
2167    /// Circumventing approval processes.
2168    ApprovalCircumvention,
2169    /// Exploiting timing (period-end, holidays).
2170    TimingExploitation,
2171    /// Transaction splitting to avoid thresholds.
2172    TransactionSplitting,
2173    /// Account misclassification.
2174    AccountMisclassification,
2175    /// Collusion with other employees.
2176    Collusion,
2177    /// Data alteration or deletion.
2178    DataAlteration,
2179    /// Creating false documentation.
2180    FalseDocumentation,
2181}
2182
2183impl ConcealmentTechnique {
2184    /// Returns the difficulty bonus this technique adds.
2185    pub fn difficulty_bonus(&self) -> f64 {
2186        match self {
2187            ConcealmentTechnique::DocumentManipulation => 0.20,
2188            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2189            ConcealmentTechnique::TimingExploitation => 0.10,
2190            ConcealmentTechnique::TransactionSplitting => 0.15,
2191            ConcealmentTechnique::AccountMisclassification => 0.10,
2192            ConcealmentTechnique::Collusion => 0.25,
2193            ConcealmentTechnique::DataAlteration => 0.20,
2194            ConcealmentTechnique::FalseDocumentation => 0.15,
2195        }
2196    }
2197}
2198
2199// ============================================================================
2200// ACFE-ALIGNED FRAUD TAXONOMY
2201// ============================================================================
2202//
2203// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2204// Nations: Occupational Fraud Classification System. This taxonomy provides
2205// ACFE-aligned categories, schemes, and calibration data.
2206
2207/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2208///
2209/// ACFE Report to the Nations statistics (typical):
2210/// - Asset Misappropriation: 86% of cases, $100k median loss
2211/// - Corruption: 33% of cases, $150k median loss
2212/// - Financial Statement Fraud: 10% of cases, $954k median loss
2213///
2214/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2215#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2216pub enum AcfeFraudCategory {
2217    /// Theft of organizational assets (cash, inventory, equipment).
2218    /// Most common (86% of cases) but typically lowest median loss ($100k).
2219    #[default]
2220    AssetMisappropriation,
2221    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2222    /// Medium frequency (33% of cases), medium median loss ($150k).
2223    Corruption,
2224    /// Intentional misstatement of financial statements.
2225    /// Least common (10% of cases) but highest median loss ($954k).
2226    FinancialStatementFraud,
2227}
2228
2229impl AcfeFraudCategory {
2230    /// Returns the name of this category.
2231    pub fn name(&self) -> &'static str {
2232        match self {
2233            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2234            AcfeFraudCategory::Corruption => "corruption",
2235            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2236        }
2237    }
2238
2239    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2240    pub fn typical_occurrence_rate(&self) -> f64 {
2241        match self {
2242            AcfeFraudCategory::AssetMisappropriation => 0.86,
2243            AcfeFraudCategory::Corruption => 0.33,
2244            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2245        }
2246    }
2247
2248    /// Returns the typical median loss amount (from ACFE reports).
2249    pub fn typical_median_loss(&self) -> Decimal {
2250        match self {
2251            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2252            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2253            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2254        }
2255    }
2256
2257    /// Returns the typical detection time in months (from ACFE reports).
2258    pub fn typical_detection_months(&self) -> u32 {
2259        match self {
2260            AcfeFraudCategory::AssetMisappropriation => 12,
2261            AcfeFraudCategory::Corruption => 18,
2262            AcfeFraudCategory::FinancialStatementFraud => 24,
2263        }
2264    }
2265}
2266
2267/// Cash-based fraud schemes under Asset Misappropriation.
2268///
2269/// Organized according to the ACFE Fraud Tree:
2270/// - Theft of Cash on Hand
2271/// - Theft of Cash Receipts
2272/// - Fraudulent Disbursements
2273#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2274pub enum CashFraudScheme {
2275    // ========== Theft of Cash on Hand ==========
2276    /// Stealing cash from cash drawers or safes after it has been recorded.
2277    Larceny,
2278    /// Stealing cash before it is recorded in the books (intercepts receipts).
2279    Skimming,
2280
2281    // ========== Theft of Cash Receipts ==========
2282    /// Skimming from sales transactions before recording.
2283    SalesSkimming,
2284    /// Intercepting customer payments on accounts receivable.
2285    ReceivablesSkimming,
2286    /// Creating false refunds to pocket the difference.
2287    RefundSchemes,
2288
2289    // ========== Fraudulent Disbursements - Billing Schemes ==========
2290    /// Creating fictitious vendors to invoice and pay.
2291    ShellCompany,
2292    /// Manipulating payments to legitimate vendors for personal gain.
2293    NonAccompliceVendor,
2294    /// Using company funds for personal purchases.
2295    PersonalPurchases,
2296
2297    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2298    /// Creating fake employees to collect wages.
2299    GhostEmployee,
2300    /// Falsifying hours worked, sales commissions, or salary rates.
2301    FalsifiedWages,
2302    /// Manipulating commission calculations.
2303    CommissionSchemes,
2304
2305    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2306    /// Claiming non-business expenses as business expenses.
2307    MischaracterizedExpenses,
2308    /// Inflating legitimate expense amounts.
2309    OverstatedExpenses,
2310    /// Creating completely fictitious expenses.
2311    FictitiousExpenses,
2312
2313    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2314    /// Forging the signature of an authorized check signer.
2315    ForgedMaker,
2316    /// Intercepting and altering the endorsement on legitimate checks.
2317    ForgedEndorsement,
2318    /// Altering the payee on a legitimate check.
2319    AlteredPayee,
2320    /// Authorized signer writing checks for personal benefit.
2321    AuthorizedMaker,
2322
2323    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2324    /// Creating false voided transactions.
2325    FalseVoids,
2326    /// Processing fictitious refunds.
2327    FalseRefunds,
2328}
2329
2330impl CashFraudScheme {
2331    /// Returns the ACFE category this scheme belongs to.
2332    pub fn category(&self) -> AcfeFraudCategory {
2333        AcfeFraudCategory::AssetMisappropriation
2334    }
2335
2336    /// Returns the subcategory within the ACFE Fraud Tree.
2337    pub fn subcategory(&self) -> &'static str {
2338        match self {
2339            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2340            CashFraudScheme::SalesSkimming
2341            | CashFraudScheme::ReceivablesSkimming
2342            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2343            CashFraudScheme::ShellCompany
2344            | CashFraudScheme::NonAccompliceVendor
2345            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2346            CashFraudScheme::GhostEmployee
2347            | CashFraudScheme::FalsifiedWages
2348            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2349            CashFraudScheme::MischaracterizedExpenses
2350            | CashFraudScheme::OverstatedExpenses
2351            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2352            CashFraudScheme::ForgedMaker
2353            | CashFraudScheme::ForgedEndorsement
2354            | CashFraudScheme::AlteredPayee
2355            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2356            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2357        }
2358    }
2359
2360    /// Returns the typical severity (1-5) for this scheme.
2361    pub fn severity(&self) -> u8 {
2362        match self {
2363            // Lower severity - often small amounts, easier to detect
2364            CashFraudScheme::FalseVoids
2365            | CashFraudScheme::FalseRefunds
2366            | CashFraudScheme::MischaracterizedExpenses => 3,
2367            // Medium severity
2368            CashFraudScheme::OverstatedExpenses
2369            | CashFraudScheme::Skimming
2370            | CashFraudScheme::Larceny
2371            | CashFraudScheme::PersonalPurchases
2372            | CashFraudScheme::FalsifiedWages => 4,
2373            // Higher severity - larger amounts, harder to detect
2374            CashFraudScheme::ShellCompany
2375            | CashFraudScheme::GhostEmployee
2376            | CashFraudScheme::FictitiousExpenses
2377            | CashFraudScheme::ForgedMaker
2378            | CashFraudScheme::AuthorizedMaker => 5,
2379            _ => 4,
2380        }
2381    }
2382
2383    /// Returns the typical detection difficulty.
2384    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2385        match self {
2386            // Easy to detect with basic controls
2387            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2388                AnomalyDetectionDifficulty::Easy
2389            }
2390            // Moderate - requires reconciliation
2391            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2392                AnomalyDetectionDifficulty::Moderate
2393            }
2394            // Hard - requires sophisticated analysis
2395            CashFraudScheme::Skimming
2396            | CashFraudScheme::ShellCompany
2397            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2398            // Expert level
2399            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2400                AnomalyDetectionDifficulty::Expert
2401            }
2402            _ => AnomalyDetectionDifficulty::Moderate,
2403        }
2404    }
2405
2406    /// Returns all variants for iteration.
2407    pub fn all_variants() -> &'static [CashFraudScheme] {
2408        &[
2409            CashFraudScheme::Larceny,
2410            CashFraudScheme::Skimming,
2411            CashFraudScheme::SalesSkimming,
2412            CashFraudScheme::ReceivablesSkimming,
2413            CashFraudScheme::RefundSchemes,
2414            CashFraudScheme::ShellCompany,
2415            CashFraudScheme::NonAccompliceVendor,
2416            CashFraudScheme::PersonalPurchases,
2417            CashFraudScheme::GhostEmployee,
2418            CashFraudScheme::FalsifiedWages,
2419            CashFraudScheme::CommissionSchemes,
2420            CashFraudScheme::MischaracterizedExpenses,
2421            CashFraudScheme::OverstatedExpenses,
2422            CashFraudScheme::FictitiousExpenses,
2423            CashFraudScheme::ForgedMaker,
2424            CashFraudScheme::ForgedEndorsement,
2425            CashFraudScheme::AlteredPayee,
2426            CashFraudScheme::AuthorizedMaker,
2427            CashFraudScheme::FalseVoids,
2428            CashFraudScheme::FalseRefunds,
2429        ]
2430    }
2431}
2432
2433/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2434#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2435pub enum AssetFraudScheme {
2436    // ========== Inventory Schemes ==========
2437    /// Misusing or converting inventory for personal benefit.
2438    InventoryMisuse,
2439    /// Stealing physical inventory items.
2440    InventoryTheft,
2441    /// Manipulating purchasing to facilitate theft.
2442    InventoryPurchasingScheme,
2443    /// Manipulating receiving/shipping to steal inventory.
2444    InventoryReceivingScheme,
2445
2446    // ========== Other Asset Schemes ==========
2447    /// Misusing company equipment or vehicles.
2448    EquipmentMisuse,
2449    /// Theft of company equipment, tools, or supplies.
2450    EquipmentTheft,
2451    /// Unauthorized access to or theft of intellectual property.
2452    IntellectualPropertyTheft,
2453    /// Using company time/resources for personal business.
2454    TimeTheft,
2455}
2456
2457impl AssetFraudScheme {
2458    /// Returns the ACFE category this scheme belongs to.
2459    pub fn category(&self) -> AcfeFraudCategory {
2460        AcfeFraudCategory::AssetMisappropriation
2461    }
2462
2463    /// Returns the subcategory within the ACFE Fraud Tree.
2464    pub fn subcategory(&self) -> &'static str {
2465        match self {
2466            AssetFraudScheme::InventoryMisuse
2467            | AssetFraudScheme::InventoryTheft
2468            | AssetFraudScheme::InventoryPurchasingScheme
2469            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2470            _ => "other_assets",
2471        }
2472    }
2473
2474    /// Returns the typical severity (1-5) for this scheme.
2475    pub fn severity(&self) -> u8 {
2476        match self {
2477            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2478            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2479            AssetFraudScheme::InventoryTheft
2480            | AssetFraudScheme::InventoryPurchasingScheme
2481            | AssetFraudScheme::InventoryReceivingScheme => 4,
2482            AssetFraudScheme::IntellectualPropertyTheft => 5,
2483        }
2484    }
2485}
2486
2487/// Corruption schemes under the ACFE Fraud Tree.
2488///
2489/// Corruption schemes involve the wrongful use of influence in a business
2490/// transaction to procure personal benefit.
2491#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2492pub enum CorruptionScheme {
2493    // ========== Conflicts of Interest ==========
2494    /// Employee has undisclosed financial interest in purchasing decisions.
2495    PurchasingConflict,
2496    /// Employee has undisclosed relationship with customer/vendor.
2497    SalesConflict,
2498    /// Employee owns or has interest in competing business.
2499    OutsideBusinessInterest,
2500    /// Employee makes decisions benefiting family members.
2501    NepotismConflict,
2502
2503    // ========== Bribery ==========
2504    /// Kickback payments from vendors for favorable treatment.
2505    InvoiceKickback,
2506    /// Collusion among vendors to inflate prices.
2507    BidRigging,
2508    /// Other cash payments for favorable decisions.
2509    CashBribery,
2510    /// Bribery of government officials.
2511    PublicOfficial,
2512
2513    // ========== Illegal Gratuities ==========
2514    /// Gifts given after favorable decisions (not agreed in advance).
2515    IllegalGratuity,
2516
2517    // ========== Economic Extortion ==========
2518    /// Demanding payment under threat of adverse action.
2519    EconomicExtortion,
2520}
2521
2522impl CorruptionScheme {
2523    /// Returns the ACFE category this scheme belongs to.
2524    pub fn category(&self) -> AcfeFraudCategory {
2525        AcfeFraudCategory::Corruption
2526    }
2527
2528    /// Returns the subcategory within the ACFE Fraud Tree.
2529    pub fn subcategory(&self) -> &'static str {
2530        match self {
2531            CorruptionScheme::PurchasingConflict
2532            | CorruptionScheme::SalesConflict
2533            | CorruptionScheme::OutsideBusinessInterest
2534            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2535            CorruptionScheme::InvoiceKickback
2536            | CorruptionScheme::BidRigging
2537            | CorruptionScheme::CashBribery
2538            | CorruptionScheme::PublicOfficial => "bribery",
2539            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2540            CorruptionScheme::EconomicExtortion => "economic_extortion",
2541        }
2542    }
2543
2544    /// Returns the typical severity (1-5) for this scheme.
2545    pub fn severity(&self) -> u8 {
2546        match self {
2547            // Lower severity conflicts of interest
2548            CorruptionScheme::NepotismConflict => 3,
2549            // Medium severity
2550            CorruptionScheme::PurchasingConflict
2551            | CorruptionScheme::SalesConflict
2552            | CorruptionScheme::OutsideBusinessInterest
2553            | CorruptionScheme::IllegalGratuity => 4,
2554            // High severity - active corruption
2555            CorruptionScheme::InvoiceKickback
2556            | CorruptionScheme::BidRigging
2557            | CorruptionScheme::CashBribery
2558            | CorruptionScheme::EconomicExtortion => 5,
2559            // Highest severity - involves public officials
2560            CorruptionScheme::PublicOfficial => 5,
2561        }
2562    }
2563
2564    /// Returns the typical detection difficulty.
2565    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2566        match self {
2567            // Easier to detect with proper disclosure requirements
2568            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2569                AnomalyDetectionDifficulty::Moderate
2570            }
2571            // Hard - requires transaction pattern analysis
2572            CorruptionScheme::PurchasingConflict
2573            | CorruptionScheme::SalesConflict
2574            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2575            // Expert level - deliberate concealment
2576            CorruptionScheme::InvoiceKickback
2577            | CorruptionScheme::CashBribery
2578            | CorruptionScheme::PublicOfficial
2579            | CorruptionScheme::IllegalGratuity
2580            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2581        }
2582    }
2583
2584    /// Returns all variants for iteration.
2585    pub fn all_variants() -> &'static [CorruptionScheme] {
2586        &[
2587            CorruptionScheme::PurchasingConflict,
2588            CorruptionScheme::SalesConflict,
2589            CorruptionScheme::OutsideBusinessInterest,
2590            CorruptionScheme::NepotismConflict,
2591            CorruptionScheme::InvoiceKickback,
2592            CorruptionScheme::BidRigging,
2593            CorruptionScheme::CashBribery,
2594            CorruptionScheme::PublicOfficial,
2595            CorruptionScheme::IllegalGratuity,
2596            CorruptionScheme::EconomicExtortion,
2597        ]
2598    }
2599}
2600
2601/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2602///
2603/// Financial statement fraud involves the intentional misstatement or omission
2604/// of material information in financial reports.
2605#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2606pub enum FinancialStatementScheme {
2607    // ========== Asset/Revenue Overstatement ==========
2608    /// Recording revenue before it is earned.
2609    PrematureRevenue,
2610    /// Deferring expenses to future periods.
2611    DelayedExpenses,
2612    /// Recording revenue for transactions that never occurred.
2613    FictitiousRevenues,
2614    /// Failing to record known liabilities.
2615    ConcealedLiabilities,
2616    /// Overstating the value of assets.
2617    ImproperAssetValuations,
2618    /// Omitting or misstating required disclosures.
2619    ImproperDisclosures,
2620    /// Manipulating timing of revenue recognition (channel stuffing).
2621    ChannelStuffing,
2622    /// Recognizing bill-and-hold revenue improperly.
2623    BillAndHold,
2624    /// Capitalizing expenses that should be expensed.
2625    ImproperCapitalization,
2626
2627    // ========== Asset/Revenue Understatement ==========
2628    /// Understating revenue (often for tax purposes).
2629    UnderstatedRevenues,
2630    /// Recording excessive expenses.
2631    OverstatedExpenses,
2632    /// Recording excessive liabilities or reserves.
2633    OverstatedLiabilities,
2634    /// Undervaluing assets for writedowns/reserves.
2635    ImproperAssetWritedowns,
2636}
2637
2638impl FinancialStatementScheme {
2639    /// Returns the ACFE category this scheme belongs to.
2640    pub fn category(&self) -> AcfeFraudCategory {
2641        AcfeFraudCategory::FinancialStatementFraud
2642    }
2643
2644    /// Returns the subcategory within the ACFE Fraud Tree.
2645    pub fn subcategory(&self) -> &'static str {
2646        match self {
2647            FinancialStatementScheme::UnderstatedRevenues
2648            | FinancialStatementScheme::OverstatedExpenses
2649            | FinancialStatementScheme::OverstatedLiabilities
2650            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2651            _ => "overstatement",
2652        }
2653    }
2654
2655    /// Returns the typical severity (1-5) for this scheme.
2656    pub fn severity(&self) -> u8 {
2657        // All financial statement fraud is high severity
2658        5
2659    }
2660
2661    /// Returns the typical detection difficulty.
2662    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2663        match self {
2664            // Easier to detect with good analytics
2665            FinancialStatementScheme::ChannelStuffing
2666            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2667            // Hard - requires deep analysis
2668            FinancialStatementScheme::PrematureRevenue
2669            | FinancialStatementScheme::ImproperCapitalization
2670            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2671            // Expert level
2672            FinancialStatementScheme::FictitiousRevenues
2673            | FinancialStatementScheme::ConcealedLiabilities
2674            | FinancialStatementScheme::ImproperAssetValuations
2675            | FinancialStatementScheme::ImproperDisclosures
2676            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2677            _ => AnomalyDetectionDifficulty::Hard,
2678        }
2679    }
2680
2681    /// Returns all variants for iteration.
2682    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2683        &[
2684            FinancialStatementScheme::PrematureRevenue,
2685            FinancialStatementScheme::DelayedExpenses,
2686            FinancialStatementScheme::FictitiousRevenues,
2687            FinancialStatementScheme::ConcealedLiabilities,
2688            FinancialStatementScheme::ImproperAssetValuations,
2689            FinancialStatementScheme::ImproperDisclosures,
2690            FinancialStatementScheme::ChannelStuffing,
2691            FinancialStatementScheme::BillAndHold,
2692            FinancialStatementScheme::ImproperCapitalization,
2693            FinancialStatementScheme::UnderstatedRevenues,
2694            FinancialStatementScheme::OverstatedExpenses,
2695            FinancialStatementScheme::OverstatedLiabilities,
2696            FinancialStatementScheme::ImproperAssetWritedowns,
2697        ]
2698    }
2699}
2700
2701/// Unified ACFE scheme type that encompasses all fraud schemes.
2702#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2703pub enum AcfeScheme {
2704    /// Cash-based fraud schemes.
2705    Cash(CashFraudScheme),
2706    /// Inventory and other asset fraud schemes.
2707    Asset(AssetFraudScheme),
2708    /// Corruption schemes.
2709    Corruption(CorruptionScheme),
2710    /// Financial statement fraud schemes.
2711    FinancialStatement(FinancialStatementScheme),
2712}
2713
2714impl AcfeScheme {
2715    /// Returns the ACFE category this scheme belongs to.
2716    pub fn category(&self) -> AcfeFraudCategory {
2717        match self {
2718            AcfeScheme::Cash(s) => s.category(),
2719            AcfeScheme::Asset(s) => s.category(),
2720            AcfeScheme::Corruption(s) => s.category(),
2721            AcfeScheme::FinancialStatement(s) => s.category(),
2722        }
2723    }
2724
2725    /// Returns the severity (1-5) for this scheme.
2726    pub fn severity(&self) -> u8 {
2727        match self {
2728            AcfeScheme::Cash(s) => s.severity(),
2729            AcfeScheme::Asset(s) => s.severity(),
2730            AcfeScheme::Corruption(s) => s.severity(),
2731            AcfeScheme::FinancialStatement(s) => s.severity(),
2732        }
2733    }
2734
2735    /// Returns the detection difficulty for this scheme.
2736    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2737        match self {
2738            AcfeScheme::Cash(s) => s.detection_difficulty(),
2739            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2740            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2741            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2742        }
2743    }
2744}
2745
2746/// How a fraud was detected (from ACFE statistics).
2747#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2748pub enum AcfeDetectionMethod {
2749    /// Tip from employee, customer, vendor, or anonymous source.
2750    Tip,
2751    /// Internal audit procedures.
2752    InternalAudit,
2753    /// Management review and oversight.
2754    ManagementReview,
2755    /// External audit procedures.
2756    ExternalAudit,
2757    /// Account reconciliation discrepancies.
2758    AccountReconciliation,
2759    /// Document examination.
2760    DocumentExamination,
2761    /// Discovered by accident.
2762    ByAccident,
2763    /// Automated monitoring/IT controls.
2764    ItControls,
2765    /// Surveillance or investigation.
2766    Surveillance,
2767    /// Confession by perpetrator.
2768    Confession,
2769    /// Law enforcement notification.
2770    LawEnforcement,
2771    /// Other detection method.
2772    Other,
2773}
2774
2775impl AcfeDetectionMethod {
2776    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2777    pub fn typical_detection_rate(&self) -> f64 {
2778        match self {
2779            AcfeDetectionMethod::Tip => 0.42,
2780            AcfeDetectionMethod::InternalAudit => 0.16,
2781            AcfeDetectionMethod::ManagementReview => 0.12,
2782            AcfeDetectionMethod::ExternalAudit => 0.04,
2783            AcfeDetectionMethod::AccountReconciliation => 0.05,
2784            AcfeDetectionMethod::DocumentExamination => 0.04,
2785            AcfeDetectionMethod::ByAccident => 0.06,
2786            AcfeDetectionMethod::ItControls => 0.03,
2787            AcfeDetectionMethod::Surveillance => 0.02,
2788            AcfeDetectionMethod::Confession => 0.02,
2789            AcfeDetectionMethod::LawEnforcement => 0.01,
2790            AcfeDetectionMethod::Other => 0.03,
2791        }
2792    }
2793
2794    /// Returns all variants for iteration.
2795    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2796        &[
2797            AcfeDetectionMethod::Tip,
2798            AcfeDetectionMethod::InternalAudit,
2799            AcfeDetectionMethod::ManagementReview,
2800            AcfeDetectionMethod::ExternalAudit,
2801            AcfeDetectionMethod::AccountReconciliation,
2802            AcfeDetectionMethod::DocumentExamination,
2803            AcfeDetectionMethod::ByAccident,
2804            AcfeDetectionMethod::ItControls,
2805            AcfeDetectionMethod::Surveillance,
2806            AcfeDetectionMethod::Confession,
2807            AcfeDetectionMethod::LawEnforcement,
2808            AcfeDetectionMethod::Other,
2809        ]
2810    }
2811}
2812
2813/// Department/position of perpetrator (from ACFE statistics).
2814#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2815pub enum PerpetratorDepartment {
2816    /// Accounting, finance, or bookkeeping.
2817    Accounting,
2818    /// Operations or manufacturing.
2819    Operations,
2820    /// Executive/upper management.
2821    Executive,
2822    /// Sales.
2823    Sales,
2824    /// Customer service.
2825    CustomerService,
2826    /// Purchasing/procurement.
2827    Purchasing,
2828    /// Information technology.
2829    It,
2830    /// Human resources.
2831    HumanResources,
2832    /// Administrative/clerical.
2833    Administrative,
2834    /// Warehouse/inventory.
2835    Warehouse,
2836    /// Board of directors.
2837    BoardOfDirectors,
2838    /// Other department.
2839    Other,
2840}
2841
2842impl PerpetratorDepartment {
2843    /// Returns the typical percentage of frauds by department (from ACFE reports).
2844    pub fn typical_occurrence_rate(&self) -> f64 {
2845        match self {
2846            PerpetratorDepartment::Accounting => 0.21,
2847            PerpetratorDepartment::Operations => 0.17,
2848            PerpetratorDepartment::Executive => 0.12,
2849            PerpetratorDepartment::Sales => 0.11,
2850            PerpetratorDepartment::CustomerService => 0.07,
2851            PerpetratorDepartment::Purchasing => 0.06,
2852            PerpetratorDepartment::It => 0.05,
2853            PerpetratorDepartment::HumanResources => 0.04,
2854            PerpetratorDepartment::Administrative => 0.04,
2855            PerpetratorDepartment::Warehouse => 0.03,
2856            PerpetratorDepartment::BoardOfDirectors => 0.02,
2857            PerpetratorDepartment::Other => 0.08,
2858        }
2859    }
2860
2861    /// Returns the typical median loss by perpetrator department.
2862    pub fn typical_median_loss(&self) -> Decimal {
2863        match self {
2864            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2865            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2866            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2867            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2868            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2869            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2870            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2871            _ => Decimal::new(80_000, 0),
2872        }
2873    }
2874}
2875
2876/// Perpetrator position level (from ACFE statistics).
2877#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2878pub enum PerpetratorLevel {
2879    /// Entry-level employee.
2880    Employee,
2881    /// Manager or supervisor.
2882    Manager,
2883    /// Owner, executive, or C-level.
2884    OwnerExecutive,
2885}
2886
2887impl PerpetratorLevel {
2888    /// Returns the typical percentage of frauds by position level.
2889    pub fn typical_occurrence_rate(&self) -> f64 {
2890        match self {
2891            PerpetratorLevel::Employee => 0.42,
2892            PerpetratorLevel::Manager => 0.36,
2893            PerpetratorLevel::OwnerExecutive => 0.22,
2894        }
2895    }
2896
2897    /// Returns the typical median loss by position level.
2898    pub fn typical_median_loss(&self) -> Decimal {
2899        match self {
2900            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2901            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2902            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2903        }
2904    }
2905}
2906
2907/// ACFE Calibration data for fraud generation.
2908///
2909/// Contains statistical parameters based on ACFE Report to the Nations
2910/// for realistic fraud pattern generation.
2911#[derive(Debug, Clone, Serialize, Deserialize)]
2912pub struct AcfeCalibration {
2913    /// Overall median loss for occupational fraud ($117,000 typical).
2914    pub median_loss: Decimal,
2915    /// Median duration in months before detection (12 months typical).
2916    pub median_duration_months: u32,
2917    /// Distribution of fraud by category.
2918    pub category_distribution: HashMap<String, f64>,
2919    /// Distribution of detection methods.
2920    pub detection_method_distribution: HashMap<String, f64>,
2921    /// Distribution by perpetrator department.
2922    pub department_distribution: HashMap<String, f64>,
2923    /// Distribution by perpetrator level.
2924    pub level_distribution: HashMap<String, f64>,
2925    /// Average number of red flags per fraud case.
2926    pub avg_red_flags_per_case: f64,
2927    /// Percentage of frauds involving collusion.
2928    pub collusion_rate: f64,
2929}
2930
2931impl Default for AcfeCalibration {
2932    fn default() -> Self {
2933        let mut category_distribution = HashMap::new();
2934        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2935        category_distribution.insert("corruption".to_string(), 0.33);
2936        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2937
2938        let mut detection_method_distribution = HashMap::new();
2939        for method in AcfeDetectionMethod::all_variants() {
2940            detection_method_distribution.insert(
2941                format!("{method:?}").to_lowercase(),
2942                method.typical_detection_rate(),
2943            );
2944        }
2945
2946        let mut department_distribution = HashMap::new();
2947        department_distribution.insert("accounting".to_string(), 0.21);
2948        department_distribution.insert("operations".to_string(), 0.17);
2949        department_distribution.insert("executive".to_string(), 0.12);
2950        department_distribution.insert("sales".to_string(), 0.11);
2951        department_distribution.insert("customer_service".to_string(), 0.07);
2952        department_distribution.insert("purchasing".to_string(), 0.06);
2953        department_distribution.insert("other".to_string(), 0.26);
2954
2955        let mut level_distribution = HashMap::new();
2956        level_distribution.insert("employee".to_string(), 0.42);
2957        level_distribution.insert("manager".to_string(), 0.36);
2958        level_distribution.insert("owner_executive".to_string(), 0.22);
2959
2960        Self {
2961            median_loss: Decimal::new(117_000, 0),
2962            median_duration_months: 12,
2963            category_distribution,
2964            detection_method_distribution,
2965            department_distribution,
2966            level_distribution,
2967            avg_red_flags_per_case: 2.8,
2968            collusion_rate: 0.50,
2969        }
2970    }
2971}
2972
2973impl AcfeCalibration {
2974    /// Creates a new ACFE calibration with the given parameters.
2975    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2976        Self {
2977            median_loss,
2978            median_duration_months,
2979            ..Self::default()
2980        }
2981    }
2982
2983    /// Returns the median loss for a specific category.
2984    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2985        category.typical_median_loss()
2986    }
2987
2988    /// Returns the median duration for a specific category.
2989    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
2990        category.typical_detection_months()
2991    }
2992
2993    /// Validates the calibration data.
2994    pub fn validate(&self) -> Result<(), String> {
2995        if self.median_loss <= Decimal::ZERO {
2996            return Err("Median loss must be positive".to_string());
2997        }
2998        if self.median_duration_months == 0 {
2999            return Err("Median duration must be at least 1 month".to_string());
3000        }
3001        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3002            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3003        }
3004        Ok(())
3005    }
3006}
3007
3008/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3009///
3010/// The fraud triangle is a model for explaining the factors that cause
3011/// someone to commit occupational fraud.
3012#[derive(Debug, Clone, Serialize, Deserialize)]
3013pub struct FraudTriangle {
3014    /// Pressure or incentive to commit fraud.
3015    pub pressure: PressureType,
3016    /// Opportunity factors that enable fraud.
3017    pub opportunities: Vec<OpportunityFactor>,
3018    /// Rationalization used to justify the fraud.
3019    pub rationalization: Rationalization,
3020}
3021
3022impl FraudTriangle {
3023    /// Creates a new fraud triangle.
3024    pub fn new(
3025        pressure: PressureType,
3026        opportunities: Vec<OpportunityFactor>,
3027        rationalization: Rationalization,
3028    ) -> Self {
3029        Self {
3030            pressure,
3031            opportunities,
3032            rationalization,
3033        }
3034    }
3035
3036    /// Returns a risk score based on the fraud triangle components.
3037    pub fn risk_score(&self) -> f64 {
3038        let pressure_score = self.pressure.risk_weight();
3039        let opportunity_score: f64 = self
3040            .opportunities
3041            .iter()
3042            .map(OpportunityFactor::risk_weight)
3043            .sum::<f64>()
3044            / self.opportunities.len().max(1) as f64;
3045        let rationalization_score = self.rationalization.risk_weight();
3046
3047        (pressure_score + opportunity_score + rationalization_score) / 3.0
3048    }
3049}
3050
3051/// Types of pressure/incentive that can lead to fraud.
3052#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3053pub enum PressureType {
3054    // Financial Pressures
3055    /// Personal financial difficulties (debt, lifestyle beyond means).
3056    PersonalFinancialDifficulties,
3057    /// Pressure to meet financial targets/earnings expectations.
3058    FinancialTargets,
3059    /// Market or analyst expectations.
3060    MarketExpectations,
3061    /// Debt covenant compliance requirements.
3062    CovenantCompliance,
3063    /// Credit rating maintenance.
3064    CreditRatingMaintenance,
3065    /// Acquisition/merger valuation pressure.
3066    AcquisitionValuation,
3067
3068    // Non-Financial Pressures
3069    /// Fear of job loss.
3070    JobSecurity,
3071    /// Pressure to maintain status or image.
3072    StatusMaintenance,
3073    /// Gambling addiction.
3074    GamblingAddiction,
3075    /// Substance abuse issues.
3076    SubstanceAbuse,
3077    /// Family pressure or obligations.
3078    FamilyPressure,
3079    /// Greed or desire for more.
3080    Greed,
3081}
3082
3083impl PressureType {
3084    /// Returns the risk weight (0.0-1.0) for this pressure type.
3085    pub fn risk_weight(&self) -> f64 {
3086        match self {
3087            PressureType::PersonalFinancialDifficulties => 0.80,
3088            PressureType::FinancialTargets => 0.75,
3089            PressureType::MarketExpectations => 0.70,
3090            PressureType::CovenantCompliance => 0.85,
3091            PressureType::CreditRatingMaintenance => 0.70,
3092            PressureType::AcquisitionValuation => 0.75,
3093            PressureType::JobSecurity => 0.65,
3094            PressureType::StatusMaintenance => 0.55,
3095            PressureType::GamblingAddiction => 0.90,
3096            PressureType::SubstanceAbuse => 0.85,
3097            PressureType::FamilyPressure => 0.60,
3098            PressureType::Greed => 0.70,
3099        }
3100    }
3101}
3102
3103/// Opportunity factors that enable fraud.
3104#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3105pub enum OpportunityFactor {
3106    /// Weak internal controls.
3107    WeakInternalControls,
3108    /// Lack of segregation of duties.
3109    LackOfSegregation,
3110    /// Override capability.
3111    ManagementOverride,
3112    /// Complex or unusual transactions.
3113    ComplexTransactions,
3114    /// Related party transactions.
3115    RelatedPartyTransactions,
3116    /// Poor tone at the top.
3117    PoorToneAtTop,
3118    /// Inadequate supervision.
3119    InadequateSupervision,
3120    /// Access to assets without accountability.
3121    AssetAccess,
3122    /// Inadequate record keeping.
3123    PoorRecordKeeping,
3124    /// Failure to discipline fraud perpetrators.
3125    LackOfDiscipline,
3126    /// Lack of independent checks.
3127    LackOfIndependentChecks,
3128}
3129
3130impl OpportunityFactor {
3131    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3132    pub fn risk_weight(&self) -> f64 {
3133        match self {
3134            OpportunityFactor::WeakInternalControls => 0.85,
3135            OpportunityFactor::LackOfSegregation => 0.80,
3136            OpportunityFactor::ManagementOverride => 0.90,
3137            OpportunityFactor::ComplexTransactions => 0.70,
3138            OpportunityFactor::RelatedPartyTransactions => 0.75,
3139            OpportunityFactor::PoorToneAtTop => 0.85,
3140            OpportunityFactor::InadequateSupervision => 0.75,
3141            OpportunityFactor::AssetAccess => 0.70,
3142            OpportunityFactor::PoorRecordKeeping => 0.65,
3143            OpportunityFactor::LackOfDiscipline => 0.60,
3144            OpportunityFactor::LackOfIndependentChecks => 0.75,
3145        }
3146    }
3147}
3148
3149/// Rationalizations used by fraud perpetrators.
3150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3151pub enum Rationalization {
3152    /// "I'm just borrowing; I'll pay it back."
3153    TemporaryBorrowing,
3154    /// "Everyone does it."
3155    EveryoneDoesIt,
3156    /// "It's for the good of the company."
3157    ForTheCompanyGood,
3158    /// "I deserve this; the company owes me."
3159    Entitlement,
3160    /// "I was just following orders."
3161    FollowingOrders,
3162    /// "They won't miss it; they have plenty."
3163    TheyWontMissIt,
3164    /// "I need it more than they do."
3165    NeedItMore,
3166    /// "It's not really stealing."
3167    NotReallyStealing,
3168    /// "I'm underpaid for what I do."
3169    Underpaid,
3170    /// "It's a victimless crime."
3171    VictimlessCrime,
3172}
3173
3174impl Rationalization {
3175    /// Returns the risk weight (0.0-1.0) for this rationalization.
3176    pub fn risk_weight(&self) -> f64 {
3177        match self {
3178            // More dangerous rationalizations
3179            Rationalization::Entitlement => 0.85,
3180            Rationalization::EveryoneDoesIt => 0.80,
3181            Rationalization::NotReallyStealing => 0.80,
3182            Rationalization::TheyWontMissIt => 0.75,
3183            // Medium risk
3184            Rationalization::Underpaid => 0.70,
3185            Rationalization::ForTheCompanyGood => 0.65,
3186            Rationalization::NeedItMore => 0.65,
3187            // Lower risk (still indicates fraud)
3188            Rationalization::TemporaryBorrowing => 0.60,
3189            Rationalization::FollowingOrders => 0.55,
3190            Rationalization::VictimlessCrime => 0.60,
3191        }
3192    }
3193}
3194
3195// ============================================================================
3196// NEAR-MISS TYPES
3197// ============================================================================
3198
3199/// Type of near-miss pattern (suspicious but legitimate).
3200#[derive(Debug, Clone, Serialize, Deserialize)]
3201pub enum NearMissPattern {
3202    /// Transaction very similar to another (possible duplicate but legitimate).
3203    NearDuplicate {
3204        /// Date difference from similar transaction.
3205        date_difference_days: u32,
3206        /// Original transaction ID.
3207        similar_transaction_id: String,
3208    },
3209    /// Amount just below approval threshold (but legitimate).
3210    ThresholdProximity {
3211        /// The threshold being approached.
3212        threshold: Decimal,
3213        /// Percentage of threshold (0.0-1.0).
3214        proximity: f64,
3215    },
3216    /// Unusual but legitimate business pattern.
3217    UnusualLegitimate {
3218        /// Type of legitimate pattern.
3219        pattern_type: LegitimatePatternType,
3220        /// Business justification.
3221        justification: String,
3222    },
3223    /// Error that was caught and corrected.
3224    CorrectedError {
3225        /// Days until correction.
3226        correction_lag_days: u32,
3227        /// Correction document ID.
3228        correction_document_id: String,
3229    },
3230}
3231
3232/// Types of unusual but legitimate business patterns.
3233#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3234pub enum LegitimatePatternType {
3235    /// Year-end bonus payment.
3236    YearEndBonus,
3237    /// Contract prepayment.
3238    ContractPrepayment,
3239    /// Settlement payment.
3240    SettlementPayment,
3241    /// Insurance claim.
3242    InsuranceClaim,
3243    /// One-time vendor payment.
3244    OneTimePayment,
3245    /// Asset disposal.
3246    AssetDisposal,
3247    /// Seasonal inventory buildup.
3248    SeasonalInventory,
3249    /// Promotional spending.
3250    PromotionalSpending,
3251}
3252
3253impl LegitimatePatternType {
3254    /// Returns a description of this pattern type.
3255    pub fn description(&self) -> &'static str {
3256        match self {
3257            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3258            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3259            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3260            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3261            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3262            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3263            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3264            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3265        }
3266    }
3267}
3268
3269/// What might trigger a false positive for this near-miss.
3270#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3271pub enum FalsePositiveTrigger {
3272    /// Amount is near threshold.
3273    AmountNearThreshold,
3274    /// Timing is unusual.
3275    UnusualTiming,
3276    /// Similar to existing transaction.
3277    SimilarTransaction,
3278    /// New counterparty.
3279    NewCounterparty,
3280    /// Account combination unusual.
3281    UnusualAccountCombination,
3282    /// Volume spike.
3283    VolumeSpike,
3284    /// Round amount.
3285    RoundAmount,
3286}
3287
3288/// Label for a near-miss case.
3289#[derive(Debug, Clone, Serialize, Deserialize)]
3290pub struct NearMissLabel {
3291    /// Document ID.
3292    pub document_id: String,
3293    /// The near-miss pattern.
3294    pub pattern: NearMissPattern,
3295    /// How suspicious it appears (0.0-1.0).
3296    pub suspicion_score: f64,
3297    /// What would trigger a false positive.
3298    pub false_positive_trigger: FalsePositiveTrigger,
3299    /// Why this is actually legitimate.
3300    pub explanation: String,
3301}
3302
3303impl NearMissLabel {
3304    /// Creates a new near-miss label.
3305    pub fn new(
3306        document_id: impl Into<String>,
3307        pattern: NearMissPattern,
3308        suspicion_score: f64,
3309        trigger: FalsePositiveTrigger,
3310        explanation: impl Into<String>,
3311    ) -> Self {
3312        Self {
3313            document_id: document_id.into(),
3314            pattern,
3315            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3316            false_positive_trigger: trigger,
3317            explanation: explanation.into(),
3318        }
3319    }
3320}
3321
3322/// Configuration for anomaly rates.
3323#[derive(Debug, Clone, Serialize, Deserialize)]
3324pub struct AnomalyRateConfig {
3325    /// Overall anomaly rate (0.0 - 1.0).
3326    pub total_rate: f64,
3327    /// Fraud rate as proportion of anomalies.
3328    pub fraud_rate: f64,
3329    /// Error rate as proportion of anomalies.
3330    pub error_rate: f64,
3331    /// Process issue rate as proportion of anomalies.
3332    pub process_issue_rate: f64,
3333    /// Statistical anomaly rate as proportion of anomalies.
3334    pub statistical_rate: f64,
3335    /// Relational anomaly rate as proportion of anomalies.
3336    pub relational_rate: f64,
3337}
3338
3339impl Default for AnomalyRateConfig {
3340    fn default() -> Self {
3341        Self {
3342            total_rate: 0.02,         // 2% of transactions are anomalous
3343            fraud_rate: 0.25,         // 25% of anomalies are fraud
3344            error_rate: 0.35,         // 35% of anomalies are errors
3345            process_issue_rate: 0.20, // 20% are process issues
3346            statistical_rate: 0.15,   // 15% are statistical
3347            relational_rate: 0.05,    // 5% are relational
3348        }
3349    }
3350}
3351
3352impl AnomalyRateConfig {
3353    /// Validates that rates sum to approximately 1.0.
3354    pub fn validate(&self) -> Result<(), String> {
3355        let sum = self.fraud_rate
3356            + self.error_rate
3357            + self.process_issue_rate
3358            + self.statistical_rate
3359            + self.relational_rate;
3360
3361        if (sum - 1.0).abs() > 0.01 {
3362            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3363        }
3364
3365        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3366            return Err(format!(
3367                "Total rate must be between 0.0 and 1.0, got {}",
3368                self.total_rate
3369            ));
3370        }
3371
3372        Ok(())
3373    }
3374}
3375
3376#[cfg(test)]
3377#[allow(clippy::unwrap_used)]
3378mod tests {
3379    use super::*;
3380    use rust_decimal_macros::dec;
3381
3382    #[test]
3383    fn test_anomaly_type_category() {
3384        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3385        assert_eq!(fraud.category(), "Fraud");
3386        assert!(fraud.is_intentional());
3387
3388        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3389        assert_eq!(error.category(), "Error");
3390        assert!(!error.is_intentional());
3391    }
3392
3393    #[test]
3394    fn test_labeled_anomaly() {
3395        let anomaly = LabeledAnomaly::new(
3396            "ANO001".to_string(),
3397            AnomalyType::Fraud(FraudType::SelfApproval),
3398            "JE001".to_string(),
3399            "JE".to_string(),
3400            "1000".to_string(),
3401            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3402        )
3403        .with_description("User approved their own expense report")
3404        .with_related_entity("USER001");
3405
3406        assert_eq!(anomaly.severity, 3);
3407        assert!(anomaly.is_injected);
3408        assert_eq!(anomaly.related_entities.len(), 1);
3409    }
3410
3411    #[test]
3412    fn test_labeled_anomaly_with_provenance() {
3413        let anomaly = LabeledAnomaly::new(
3414            "ANO001".to_string(),
3415            AnomalyType::Fraud(FraudType::SelfApproval),
3416            "JE001".to_string(),
3417            "JE".to_string(),
3418            "1000".to_string(),
3419            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3420        )
3421        .with_run_id("run-123")
3422        .with_generation_seed(42)
3423        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3424        .with_structured_strategy(InjectionStrategy::SelfApproval {
3425            user_id: "USER001".to_string(),
3426        })
3427        .with_scenario("scenario-001")
3428        .with_original_document_hash("abc123");
3429
3430        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3431        assert_eq!(anomaly.generation_seed, Some(42));
3432        assert!(anomaly.causal_reason.is_some());
3433        assert!(anomaly.structured_strategy.is_some());
3434        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3435        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3436
3437        // Check that legacy injection_strategy is also set
3438        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3439    }
3440
3441    #[test]
3442    fn test_labeled_anomaly_derivation_chain() {
3443        let parent = LabeledAnomaly::new(
3444            "ANO001".to_string(),
3445            AnomalyType::Fraud(FraudType::DuplicatePayment),
3446            "JE001".to_string(),
3447            "JE".to_string(),
3448            "1000".to_string(),
3449            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3450        );
3451
3452        let child = LabeledAnomaly::new(
3453            "ANO002".to_string(),
3454            AnomalyType::Error(ErrorType::DuplicateEntry),
3455            "JE002".to_string(),
3456            "JE".to_string(),
3457            "1000".to_string(),
3458            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3459        )
3460        .with_parent_anomaly(&parent.anomaly_id);
3461
3462        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3463    }
3464
3465    #[test]
3466    fn test_injection_strategy_description() {
3467        let strategy = InjectionStrategy::AmountManipulation {
3468            original: dec!(1000),
3469            factor: 2.5,
3470        };
3471        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3472        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3473
3474        let strategy = InjectionStrategy::ThresholdAvoidance {
3475            threshold: dec!(10000),
3476            adjusted_amount: dec!(9999),
3477        };
3478        assert_eq!(
3479            strategy.description(),
3480            "Amount adjusted to avoid 10000 threshold"
3481        );
3482
3483        let strategy = InjectionStrategy::DateShift {
3484            days_shifted: -5,
3485            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3486        };
3487        assert_eq!(strategy.description(), "Date backdated by 5 days");
3488
3489        let strategy = InjectionStrategy::DateShift {
3490            days_shifted: 3,
3491            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3492        };
3493        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3494    }
3495
3496    #[test]
3497    fn test_causal_reason_variants() {
3498        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3499        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3500            assert!((base_rate - 0.02).abs() < 0.001);
3501        }
3502
3503        let reason = AnomalyCausalReason::TemporalPattern {
3504            pattern_name: "year_end_spike".to_string(),
3505        };
3506        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3507            assert_eq!(pattern_name, "year_end_spike");
3508        }
3509
3510        let reason = AnomalyCausalReason::ScenarioStep {
3511            scenario_type: "kickback".to_string(),
3512            step_number: 3,
3513        };
3514        if let AnomalyCausalReason::ScenarioStep {
3515            scenario_type,
3516            step_number,
3517        } = reason
3518        {
3519            assert_eq!(scenario_type, "kickback");
3520            assert_eq!(step_number, 3);
3521        }
3522    }
3523
3524    #[test]
3525    fn test_feature_vector_length() {
3526        let anomaly = LabeledAnomaly::new(
3527            "ANO001".to_string(),
3528            AnomalyType::Fraud(FraudType::SelfApproval),
3529            "JE001".to_string(),
3530            "JE".to_string(),
3531            "1000".to_string(),
3532            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3533        );
3534
3535        let features = anomaly.to_features();
3536        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3537        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3538    }
3539
3540    #[test]
3541    fn test_feature_vector_with_provenance() {
3542        let anomaly = LabeledAnomaly::new(
3543            "ANO001".to_string(),
3544            AnomalyType::Fraud(FraudType::SelfApproval),
3545            "JE001".to_string(),
3546            "JE".to_string(),
3547            "1000".to_string(),
3548            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3549        )
3550        .with_scenario("scenario-001")
3551        .with_parent_anomaly("ANO000");
3552
3553        let features = anomaly.to_features();
3554
3555        // Last two features should be 1.0 (has scenario, has parent)
3556        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3557        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3558    }
3559
3560    #[test]
3561    fn test_anomaly_summary() {
3562        let anomalies = vec![
3563            LabeledAnomaly::new(
3564                "ANO001".to_string(),
3565                AnomalyType::Fraud(FraudType::SelfApproval),
3566                "JE001".to_string(),
3567                "JE".to_string(),
3568                "1000".to_string(),
3569                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3570            ),
3571            LabeledAnomaly::new(
3572                "ANO002".to_string(),
3573                AnomalyType::Error(ErrorType::DuplicateEntry),
3574                "JE002".to_string(),
3575                "JE".to_string(),
3576                "1000".to_string(),
3577                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3578            ),
3579        ];
3580
3581        let summary = AnomalySummary::from_anomalies(&anomalies);
3582
3583        assert_eq!(summary.total_count, 2);
3584        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3585        assert_eq!(summary.by_category.get("Error"), Some(&1));
3586    }
3587
3588    #[test]
3589    fn test_rate_config_validation() {
3590        let config = AnomalyRateConfig::default();
3591        assert!(config.validate().is_ok());
3592
3593        let bad_config = AnomalyRateConfig {
3594            fraud_rate: 0.5,
3595            error_rate: 0.5,
3596            process_issue_rate: 0.5, // Sum > 1.0
3597            ..Default::default()
3598        };
3599        assert!(bad_config.validate().is_err());
3600    }
3601
3602    #[test]
3603    fn test_injection_strategy_serialization() {
3604        let strategy = InjectionStrategy::SoDViolation {
3605            duty1: "CreatePO".to_string(),
3606            duty2: "ApprovePO".to_string(),
3607            violating_user: "USER001".to_string(),
3608        };
3609
3610        let json = serde_json::to_string(&strategy).unwrap();
3611        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3612
3613        assert_eq!(strategy, deserialized);
3614    }
3615
3616    #[test]
3617    fn test_labeled_anomaly_serialization_with_provenance() {
3618        let anomaly = LabeledAnomaly::new(
3619            "ANO001".to_string(),
3620            AnomalyType::Fraud(FraudType::SelfApproval),
3621            "JE001".to_string(),
3622            "JE".to_string(),
3623            "1000".to_string(),
3624            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3625        )
3626        .with_run_id("run-123")
3627        .with_generation_seed(42)
3628        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3629
3630        let json = serde_json::to_string(&anomaly).unwrap();
3631        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3632
3633        assert_eq!(anomaly.run_id, deserialized.run_id);
3634        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3635    }
3636
3637    // ========================================
3638    // FR-003 ENHANCED TAXONOMY TESTS
3639    // ========================================
3640
3641    #[test]
3642    fn test_anomaly_category_from_anomaly_type() {
3643        // Fraud mappings
3644        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3645        assert_eq!(
3646            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3647            AnomalyCategory::FictitiousVendor
3648        );
3649
3650        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3651        assert_eq!(
3652            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3653            AnomalyCategory::VendorKickback
3654        );
3655
3656        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3657        assert_eq!(
3658            AnomalyCategory::from_anomaly_type(&fraud_structured),
3659            AnomalyCategory::StructuredTransaction
3660        );
3661
3662        // Error mappings
3663        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3664        assert_eq!(
3665            AnomalyCategory::from_anomaly_type(&error_duplicate),
3666            AnomalyCategory::DuplicatePayment
3667        );
3668
3669        // Process issue mappings
3670        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3671        assert_eq!(
3672            AnomalyCategory::from_anomaly_type(&process_skip),
3673            AnomalyCategory::MissingApproval
3674        );
3675
3676        // Relational mappings
3677        let relational_circular =
3678            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3679        assert_eq!(
3680            AnomalyCategory::from_anomaly_type(&relational_circular),
3681            AnomalyCategory::CircularFlow
3682        );
3683    }
3684
3685    #[test]
3686    fn test_anomaly_category_ordinal() {
3687        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3688        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3689        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3690    }
3691
3692    #[test]
3693    fn test_contributing_factor() {
3694        let factor = ContributingFactor::new(
3695            FactorType::AmountDeviation,
3696            15000.0,
3697            10000.0,
3698            true,
3699            0.5,
3700            "Amount exceeds threshold",
3701        );
3702
3703        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3704        assert_eq!(factor.value, 15000.0);
3705        assert_eq!(factor.threshold, 10000.0);
3706        assert!(factor.direction_greater);
3707
3708        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3709        let contribution = factor.contribution();
3710        assert!((contribution - 0.25).abs() < 0.01);
3711    }
3712
3713    #[test]
3714    fn test_contributing_factor_with_evidence() {
3715        let mut data = HashMap::new();
3716        data.insert("expected".to_string(), "10000".to_string());
3717        data.insert("actual".to_string(), "15000".to_string());
3718
3719        let factor = ContributingFactor::new(
3720            FactorType::AmountDeviation,
3721            15000.0,
3722            10000.0,
3723            true,
3724            0.5,
3725            "Amount deviation detected",
3726        )
3727        .with_evidence("transaction_history", data);
3728
3729        assert!(factor.evidence.is_some());
3730        let evidence = factor.evidence.unwrap();
3731        assert_eq!(evidence.source, "transaction_history");
3732        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3733    }
3734
3735    #[test]
3736    fn test_enhanced_anomaly_label() {
3737        let base = LabeledAnomaly::new(
3738            "ANO001".to_string(),
3739            AnomalyType::Fraud(FraudType::DuplicatePayment),
3740            "JE001".to_string(),
3741            "JE".to_string(),
3742            "1000".to_string(),
3743            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3744        );
3745
3746        let enhanced = EnhancedAnomalyLabel::from_base(base)
3747            .with_confidence(0.85)
3748            .with_severity(0.7)
3749            .with_factor(ContributingFactor::new(
3750                FactorType::DuplicateIndicator,
3751                1.0,
3752                0.5,
3753                true,
3754                0.4,
3755                "Duplicate payment detected",
3756            ))
3757            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3758
3759        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3760        assert_eq!(enhanced.enhanced_confidence, 0.85);
3761        assert_eq!(enhanced.enhanced_severity, 0.7);
3762        assert_eq!(enhanced.contributing_factors.len(), 1);
3763        assert_eq!(enhanced.secondary_categories.len(), 1);
3764    }
3765
3766    #[test]
3767    fn test_enhanced_anomaly_label_features() {
3768        let base = LabeledAnomaly::new(
3769            "ANO001".to_string(),
3770            AnomalyType::Fraud(FraudType::SelfApproval),
3771            "JE001".to_string(),
3772            "JE".to_string(),
3773            "1000".to_string(),
3774            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3775        );
3776
3777        let enhanced = EnhancedAnomalyLabel::from_base(base)
3778            .with_confidence(0.9)
3779            .with_severity(0.8)
3780            .with_factor(ContributingFactor::new(
3781                FactorType::ControlBypass,
3782                1.0,
3783                0.0,
3784                true,
3785                0.5,
3786                "Control bypass detected",
3787            ));
3788
3789        let features = enhanced.to_features();
3790
3791        // Should have 25 features (15 base + 10 enhanced)
3792        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3793        assert_eq!(features.len(), 25);
3794
3795        // Check enhanced confidence is in features
3796        assert_eq!(features[15], 0.9); // enhanced_confidence
3797
3798        // Check has_control_bypass flag
3799        assert_eq!(features[21], 1.0); // has_control_bypass
3800    }
3801
3802    #[test]
3803    fn test_enhanced_anomaly_label_feature_names() {
3804        let names = EnhancedAnomalyLabel::feature_names();
3805        assert_eq!(names.len(), 25);
3806        assert!(names.contains(&"enhanced_confidence"));
3807        assert!(names.contains(&"enhanced_severity"));
3808        assert!(names.contains(&"has_control_bypass"));
3809    }
3810
3811    #[test]
3812    fn test_factor_type_names() {
3813        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3814        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3815        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3816    }
3817
3818    #[test]
3819    fn test_anomaly_category_serialization() {
3820        let category = AnomalyCategory::CircularFlow;
3821        let json = serde_json::to_string(&category).unwrap();
3822        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3823        assert_eq!(category, deserialized);
3824
3825        let custom = AnomalyCategory::Custom("custom_type".to_string());
3826        let json = serde_json::to_string(&custom).unwrap();
3827        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3828        assert_eq!(custom, deserialized);
3829    }
3830
3831    #[test]
3832    fn test_enhanced_label_secondary_category_dedup() {
3833        let base = LabeledAnomaly::new(
3834            "ANO001".to_string(),
3835            AnomalyType::Fraud(FraudType::DuplicatePayment),
3836            "JE001".to_string(),
3837            "JE".to_string(),
3838            "1000".to_string(),
3839            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3840        );
3841
3842        let enhanced = EnhancedAnomalyLabel::from_base(base)
3843            // Try to add the primary category as secondary (should be ignored)
3844            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3845            // Add a valid secondary
3846            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3847            // Try to add duplicate secondary (should be ignored)
3848            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3849
3850        // Should only have 1 secondary category (TimingAnomaly)
3851        assert_eq!(enhanced.secondary_categories.len(), 1);
3852        assert_eq!(
3853            enhanced.secondary_categories[0],
3854            AnomalyCategory::TimingAnomaly
3855        );
3856    }
3857
3858    // ==========================================================================
3859    // Accounting Standards Fraud Type Tests
3860    // ==========================================================================
3861
3862    #[test]
3863    fn test_revenue_recognition_fraud_types() {
3864        // Test ASC 606/IFRS 15 related fraud types
3865        let fraud_types = [
3866            FraudType::ImproperRevenueRecognition,
3867            FraudType::ImproperPoAllocation,
3868            FraudType::VariableConsiderationManipulation,
3869            FraudType::ContractModificationMisstatement,
3870        ];
3871
3872        for fraud_type in fraud_types {
3873            let anomaly_type = AnomalyType::Fraud(fraud_type);
3874            assert_eq!(anomaly_type.category(), "Fraud");
3875            assert!(anomaly_type.is_intentional());
3876            assert!(anomaly_type.severity() >= 3);
3877        }
3878    }
3879
3880    #[test]
3881    fn test_lease_accounting_fraud_types() {
3882        // Test ASC 842/IFRS 16 related fraud types
3883        let fraud_types = [
3884            FraudType::LeaseClassificationManipulation,
3885            FraudType::OffBalanceSheetLease,
3886            FraudType::LeaseLiabilityUnderstatement,
3887            FraudType::RouAssetMisstatement,
3888        ];
3889
3890        for fraud_type in fraud_types {
3891            let anomaly_type = AnomalyType::Fraud(fraud_type);
3892            assert_eq!(anomaly_type.category(), "Fraud");
3893            assert!(anomaly_type.is_intentional());
3894            assert!(anomaly_type.severity() >= 3);
3895        }
3896
3897        // Off-balance sheet lease fraud should be high severity
3898        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3899    }
3900
3901    #[test]
3902    fn test_fair_value_fraud_types() {
3903        // Test ASC 820/IFRS 13 related fraud types
3904        let fraud_types = [
3905            FraudType::FairValueHierarchyManipulation,
3906            FraudType::Level3InputManipulation,
3907            FraudType::ValuationTechniqueManipulation,
3908        ];
3909
3910        for fraud_type in fraud_types {
3911            let anomaly_type = AnomalyType::Fraud(fraud_type);
3912            assert_eq!(anomaly_type.category(), "Fraud");
3913            assert!(anomaly_type.is_intentional());
3914            assert!(anomaly_type.severity() >= 4);
3915        }
3916
3917        // Level 3 manipulation is highest severity (unobservable inputs)
3918        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3919    }
3920
3921    #[test]
3922    fn test_impairment_fraud_types() {
3923        // Test ASC 360/IAS 36 related fraud types
3924        let fraud_types = [
3925            FraudType::DelayedImpairment,
3926            FraudType::ImpairmentTestAvoidance,
3927            FraudType::CashFlowProjectionManipulation,
3928            FraudType::ImproperImpairmentReversal,
3929        ];
3930
3931        for fraud_type in fraud_types {
3932            let anomaly_type = AnomalyType::Fraud(fraud_type);
3933            assert_eq!(anomaly_type.category(), "Fraud");
3934            assert!(anomaly_type.is_intentional());
3935            assert!(anomaly_type.severity() >= 3);
3936        }
3937
3938        // Cash flow manipulation has highest severity
3939        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3940    }
3941
3942    // ==========================================================================
3943    // Accounting Standards Error Type Tests
3944    // ==========================================================================
3945
3946    #[test]
3947    fn test_standards_error_types() {
3948        // Test non-fraudulent accounting standards errors
3949        let error_types = [
3950            ErrorType::RevenueTimingError,
3951            ErrorType::PoAllocationError,
3952            ErrorType::LeaseClassificationError,
3953            ErrorType::LeaseCalculationError,
3954            ErrorType::FairValueError,
3955            ErrorType::ImpairmentCalculationError,
3956            ErrorType::DiscountRateError,
3957            ErrorType::FrameworkApplicationError,
3958        ];
3959
3960        for error_type in error_types {
3961            let anomaly_type = AnomalyType::Error(error_type);
3962            assert_eq!(anomaly_type.category(), "Error");
3963            assert!(!anomaly_type.is_intentional());
3964            assert!(anomaly_type.severity() >= 3);
3965        }
3966    }
3967
3968    #[test]
3969    fn test_framework_application_error() {
3970        // Test IFRS vs GAAP confusion errors
3971        let error_type = ErrorType::FrameworkApplicationError;
3972        assert_eq!(error_type.severity(), 4);
3973
3974        let anomaly = LabeledAnomaly::new(
3975            "ERR001".to_string(),
3976            AnomalyType::Error(error_type),
3977            "JE100".to_string(),
3978            "JE".to_string(),
3979            "1000".to_string(),
3980            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3981        )
3982        .with_description("LIFO inventory method used under IFRS (not permitted)")
3983        .with_metadata("framework", "IFRS")
3984        .with_metadata("standard_violated", "IAS 2");
3985
3986        assert_eq!(anomaly.anomaly_type.category(), "Error");
3987        assert_eq!(
3988            anomaly.metadata.get("standard_violated"),
3989            Some(&"IAS 2".to_string())
3990        );
3991    }
3992
3993    #[test]
3994    fn test_standards_anomaly_serialization() {
3995        // Test that new fraud types serialize/deserialize correctly
3996        let fraud_types = [
3997            FraudType::ImproperRevenueRecognition,
3998            FraudType::LeaseClassificationManipulation,
3999            FraudType::FairValueHierarchyManipulation,
4000            FraudType::DelayedImpairment,
4001        ];
4002
4003        for fraud_type in fraud_types {
4004            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4005            let deserialized: FraudType =
4006                serde_json::from_str(&json).expect("Failed to deserialize");
4007            assert_eq!(fraud_type, deserialized);
4008        }
4009
4010        // Test error types
4011        let error_types = [
4012            ErrorType::RevenueTimingError,
4013            ErrorType::LeaseCalculationError,
4014            ErrorType::FairValueError,
4015            ErrorType::FrameworkApplicationError,
4016        ];
4017
4018        for error_type in error_types {
4019            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4020            let deserialized: ErrorType =
4021                serde_json::from_str(&json).expect("Failed to deserialize");
4022            assert_eq!(error_type, deserialized);
4023        }
4024    }
4025
4026    #[test]
4027    fn test_standards_labeled_anomaly() {
4028        // Test creating a labeled anomaly for a standards violation
4029        let anomaly = LabeledAnomaly::new(
4030            "STD001".to_string(),
4031            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4032            "CONTRACT-2024-001".to_string(),
4033            "Revenue".to_string(),
4034            "1000".to_string(),
4035            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4036        )
4037        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4038        .with_monetary_impact(dec!(500000))
4039        .with_metadata("standard", "ASC 606")
4040        .with_metadata("paragraph", "606-10-25-1")
4041        .with_metadata("contract_id", "C-2024-001")
4042        .with_related_entity("CONTRACT-2024-001")
4043        .with_related_entity("CUSTOMER-500");
4044
4045        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4046        assert!(anomaly.is_injected);
4047        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4048        assert_eq!(anomaly.related_entities.len(), 2);
4049        assert_eq!(
4050            anomaly.metadata.get("standard"),
4051            Some(&"ASC 606".to_string())
4052        );
4053    }
4054
4055    // ==========================================================================
4056    // Multi-Dimensional Labeling Tests
4057    // ==========================================================================
4058
4059    #[test]
4060    fn test_severity_level() {
4061        assert_eq!(SeverityLevel::Low.numeric(), 1);
4062        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4063
4064        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4065        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4066
4067        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4068        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4069
4070        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4071    }
4072
4073    #[test]
4074    fn test_anomaly_severity() {
4075        let severity =
4076            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4077
4078        assert_eq!(severity.level, SeverityLevel::High);
4079        assert!(severity.is_material);
4080        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4081
4082        // Not material
4083        let low_severity =
4084            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4085        assert!(!low_severity.is_material);
4086    }
4087
4088    #[test]
4089    fn test_detection_difficulty() {
4090        assert!(
4091            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4092        );
4093        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4094
4095        assert_eq!(
4096            AnomalyDetectionDifficulty::from_score(0.05),
4097            AnomalyDetectionDifficulty::Trivial
4098        );
4099        assert_eq!(
4100            AnomalyDetectionDifficulty::from_score(0.90),
4101            AnomalyDetectionDifficulty::Expert
4102        );
4103
4104        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4105    }
4106
4107    #[test]
4108    fn test_ground_truth_certainty() {
4109        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4110        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4111        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4112    }
4113
4114    #[test]
4115    fn test_detection_method() {
4116        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4117        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4118    }
4119
4120    #[test]
4121    fn test_extended_anomaly_label() {
4122        let base = LabeledAnomaly::new(
4123            "ANO001".to_string(),
4124            AnomalyType::Fraud(FraudType::FictitiousVendor),
4125            "JE001".to_string(),
4126            "JE".to_string(),
4127            "1000".to_string(),
4128            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4129        )
4130        .with_monetary_impact(dec!(100000));
4131
4132        let extended = ExtendedAnomalyLabel::from_base(base)
4133            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4134            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4135            .with_method(DetectionMethod::GraphBased)
4136            .with_method(DetectionMethod::ForensicAudit)
4137            .with_indicator("New vendor with no history")
4138            .with_indicator("Large first transaction")
4139            .with_certainty(GroundTruthCertainty::Definite)
4140            .with_entity("V001")
4141            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4142            .with_scheme("SCHEME001", 2);
4143
4144        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4145        assert_eq!(
4146            extended.detection_difficulty,
4147            AnomalyDetectionDifficulty::Hard
4148        );
4149        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4150        assert_eq!(extended.recommended_methods.len(), 3);
4151        assert_eq!(extended.key_indicators.len(), 2);
4152        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4153        assert_eq!(extended.scheme_stage, Some(2));
4154    }
4155
4156    #[test]
4157    fn test_extended_anomaly_label_features() {
4158        let base = LabeledAnomaly::new(
4159            "ANO001".to_string(),
4160            AnomalyType::Fraud(FraudType::SelfApproval),
4161            "JE001".to_string(),
4162            "JE".to_string(),
4163            "1000".to_string(),
4164            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4165        );
4166
4167        let extended =
4168            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4169
4170        let features = extended.to_features();
4171        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4172        assert_eq!(features.len(), 30);
4173
4174        // Check difficulty score is in features
4175        let difficulty_idx = 18; // Position of difficulty_score
4176        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4177    }
4178
4179    #[test]
4180    fn test_extended_label_near_miss() {
4181        let base = LabeledAnomaly::new(
4182            "ANO001".to_string(),
4183            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4184            "JE001".to_string(),
4185            "JE".to_string(),
4186            "1000".to_string(),
4187            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4188        );
4189
4190        let extended = ExtendedAnomalyLabel::from_base(base)
4191            .as_near_miss("Year-end bonus payment, legitimately high");
4192
4193        assert!(extended.is_near_miss);
4194        assert!(extended.near_miss_explanation.is_some());
4195    }
4196
4197    #[test]
4198    fn test_scheme_type() {
4199        assert_eq!(
4200            SchemeType::GradualEmbezzlement.name(),
4201            "gradual_embezzlement"
4202        );
4203        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4204        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4205    }
4206
4207    #[test]
4208    fn test_concealment_technique() {
4209        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4210        assert!(
4211            ConcealmentTechnique::Collusion.difficulty_bonus()
4212                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4213        );
4214    }
4215
4216    #[test]
4217    fn test_near_miss_label() {
4218        let near_miss = NearMissLabel::new(
4219            "JE001",
4220            NearMissPattern::ThresholdProximity {
4221                threshold: dec!(10000),
4222                proximity: 0.95,
4223            },
4224            0.7,
4225            FalsePositiveTrigger::AmountNearThreshold,
4226            "Transaction is 95% of threshold but business justified",
4227        );
4228
4229        assert_eq!(near_miss.document_id, "JE001");
4230        assert_eq!(near_miss.suspicion_score, 0.7);
4231        assert_eq!(
4232            near_miss.false_positive_trigger,
4233            FalsePositiveTrigger::AmountNearThreshold
4234        );
4235    }
4236
4237    #[test]
4238    fn test_legitimate_pattern_type() {
4239        assert_eq!(
4240            LegitimatePatternType::YearEndBonus.description(),
4241            "Year-end bonus payment"
4242        );
4243        assert_eq!(
4244            LegitimatePatternType::InsuranceClaim.description(),
4245            "Insurance claim reimbursement"
4246        );
4247    }
4248
4249    #[test]
4250    fn test_severity_detection_difficulty_serialization() {
4251        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4252        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4253        let deserialized: AnomalySeverity =
4254            serde_json::from_str(&json).expect("Failed to deserialize");
4255        assert_eq!(severity.level, deserialized.level);
4256
4257        let difficulty = AnomalyDetectionDifficulty::Hard;
4258        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4259        let deserialized: AnomalyDetectionDifficulty =
4260            serde_json::from_str(&json).expect("Failed to deserialize");
4261        assert_eq!(difficulty, deserialized);
4262    }
4263
4264    // ========================================
4265    // ACFE Taxonomy Tests
4266    // ========================================
4267
4268    #[test]
4269    fn test_acfe_fraud_category() {
4270        let asset = AcfeFraudCategory::AssetMisappropriation;
4271        assert_eq!(asset.name(), "asset_misappropriation");
4272        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4273        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4274        assert_eq!(asset.typical_detection_months(), 12);
4275
4276        let corruption = AcfeFraudCategory::Corruption;
4277        assert_eq!(corruption.name(), "corruption");
4278        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4279
4280        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4281        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4282        assert_eq!(fs_fraud.typical_detection_months(), 24);
4283    }
4284
4285    #[test]
4286    fn test_cash_fraud_scheme() {
4287        let shell = CashFraudScheme::ShellCompany;
4288        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4289        assert_eq!(shell.subcategory(), "billing_schemes");
4290        assert_eq!(shell.severity(), 5);
4291        assert_eq!(
4292            shell.detection_difficulty(),
4293            AnomalyDetectionDifficulty::Hard
4294        );
4295
4296        let ghost = CashFraudScheme::GhostEmployee;
4297        assert_eq!(ghost.subcategory(), "payroll_schemes");
4298        assert_eq!(ghost.severity(), 5);
4299
4300        // Test all variants exist
4301        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4302    }
4303
4304    #[test]
4305    fn test_asset_fraud_scheme() {
4306        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4307        assert_eq!(
4308            ip_theft.category(),
4309            AcfeFraudCategory::AssetMisappropriation
4310        );
4311        assert_eq!(ip_theft.subcategory(), "other_assets");
4312        assert_eq!(ip_theft.severity(), 5);
4313
4314        let inv_theft = AssetFraudScheme::InventoryTheft;
4315        assert_eq!(inv_theft.subcategory(), "inventory");
4316        assert_eq!(inv_theft.severity(), 4);
4317    }
4318
4319    #[test]
4320    fn test_corruption_scheme() {
4321        let kickback = CorruptionScheme::InvoiceKickback;
4322        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4323        assert_eq!(kickback.subcategory(), "bribery");
4324        assert_eq!(kickback.severity(), 5);
4325        assert_eq!(
4326            kickback.detection_difficulty(),
4327            AnomalyDetectionDifficulty::Expert
4328        );
4329
4330        let bid_rigging = CorruptionScheme::BidRigging;
4331        assert_eq!(bid_rigging.subcategory(), "bribery");
4332        assert_eq!(
4333            bid_rigging.detection_difficulty(),
4334            AnomalyDetectionDifficulty::Hard
4335        );
4336
4337        let purchasing = CorruptionScheme::PurchasingConflict;
4338        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4339
4340        // Test all variants exist
4341        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4342    }
4343
4344    #[test]
4345    fn test_financial_statement_scheme() {
4346        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4347        assert_eq!(
4348            fictitious.category(),
4349            AcfeFraudCategory::FinancialStatementFraud
4350        );
4351        assert_eq!(fictitious.subcategory(), "overstatement");
4352        assert_eq!(fictitious.severity(), 5);
4353        assert_eq!(
4354            fictitious.detection_difficulty(),
4355            AnomalyDetectionDifficulty::Expert
4356        );
4357
4358        let understated = FinancialStatementScheme::UnderstatedRevenues;
4359        assert_eq!(understated.subcategory(), "understatement");
4360
4361        // Test all variants exist
4362        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4363    }
4364
4365    #[test]
4366    fn test_acfe_scheme_unified() {
4367        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4368        assert_eq!(
4369            cash_scheme.category(),
4370            AcfeFraudCategory::AssetMisappropriation
4371        );
4372        assert_eq!(cash_scheme.severity(), 5);
4373
4374        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4375        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4376
4377        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4378        assert_eq!(
4379            fs_scheme.category(),
4380            AcfeFraudCategory::FinancialStatementFraud
4381        );
4382    }
4383
4384    #[test]
4385    fn test_acfe_detection_method() {
4386        let tip = AcfeDetectionMethod::Tip;
4387        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4388
4389        let internal_audit = AcfeDetectionMethod::InternalAudit;
4390        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4391
4392        let external_audit = AcfeDetectionMethod::ExternalAudit;
4393        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4394
4395        // Test all variants exist
4396        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4397    }
4398
4399    #[test]
4400    fn test_perpetrator_department() {
4401        let accounting = PerpetratorDepartment::Accounting;
4402        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4403        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4404
4405        let executive = PerpetratorDepartment::Executive;
4406        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4407    }
4408
4409    #[test]
4410    fn test_perpetrator_level() {
4411        let employee = PerpetratorLevel::Employee;
4412        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4413        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4414
4415        let exec = PerpetratorLevel::OwnerExecutive;
4416        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4417    }
4418
4419    #[test]
4420    fn test_acfe_calibration() {
4421        let cal = AcfeCalibration::default();
4422        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4423        assert_eq!(cal.median_duration_months, 12);
4424        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4425        assert!(cal.validate().is_ok());
4426
4427        // Test custom calibration
4428        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4429        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4430        assert_eq!(custom_cal.median_duration_months, 18);
4431
4432        // Test validation failure
4433        let bad_cal = AcfeCalibration {
4434            collusion_rate: 1.5,
4435            ..Default::default()
4436        };
4437        assert!(bad_cal.validate().is_err());
4438    }
4439
4440    #[test]
4441    fn test_fraud_triangle() {
4442        let triangle = FraudTriangle::new(
4443            PressureType::FinancialTargets,
4444            vec![
4445                OpportunityFactor::WeakInternalControls,
4446                OpportunityFactor::ManagementOverride,
4447            ],
4448            Rationalization::ForTheCompanyGood,
4449        );
4450
4451        // Risk score should be between 0 and 1
4452        let risk = triangle.risk_score();
4453        assert!((0.0..=1.0).contains(&risk));
4454        // Should be relatively high given the components
4455        assert!(risk > 0.5);
4456    }
4457
4458    #[test]
4459    fn test_pressure_types() {
4460        let financial = PressureType::FinancialTargets;
4461        assert!(financial.risk_weight() > 0.5);
4462
4463        let gambling = PressureType::GamblingAddiction;
4464        assert_eq!(gambling.risk_weight(), 0.90);
4465    }
4466
4467    #[test]
4468    fn test_opportunity_factors() {
4469        let override_factor = OpportunityFactor::ManagementOverride;
4470        assert_eq!(override_factor.risk_weight(), 0.90);
4471
4472        let weak_controls = OpportunityFactor::WeakInternalControls;
4473        assert!(weak_controls.risk_weight() > 0.8);
4474    }
4475
4476    #[test]
4477    fn test_rationalizations() {
4478        let entitlement = Rationalization::Entitlement;
4479        assert!(entitlement.risk_weight() > 0.8);
4480
4481        let borrowing = Rationalization::TemporaryBorrowing;
4482        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4483    }
4484
4485    #[test]
4486    fn test_acfe_scheme_serialization() {
4487        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4488        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4489        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4490        assert_eq!(scheme, deserialized);
4491
4492        let calibration = AcfeCalibration::default();
4493        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4494        let deserialized: AcfeCalibration =
4495            serde_json::from_str(&json).expect("Failed to deserialize");
4496        assert_eq!(calibration.median_loss, deserialized.median_loss);
4497    }
4498}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs