datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Invoice manipulation.
352    InvoiceManipulation,
353
354    // Asset Fraud
355    /// Misappropriation of assets.
356    AssetMisappropriation,
357    /// Inventory theft.
358    InventoryTheft,
359    /// Ghost employee.
360    GhostEmployee,
361
362    // Financial Statement Fraud
363    /// Premature revenue recognition.
364    PrematureRevenue,
365    /// Understated liabilities.
366    UnderstatedLiabilities,
367    /// Overstated assets.
368    OverstatedAssets,
369    /// Channel stuffing.
370    ChannelStuffing,
371
372    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
373    /// Improper revenue recognition timing (ASC 606/IFRS 15).
374    ImproperRevenueRecognition,
375    /// Multiple performance obligations not properly separated.
376    ImproperPoAllocation,
377    /// Variable consideration not properly estimated.
378    VariableConsiderationManipulation,
379    /// Contract modifications not properly accounted for.
380    ContractModificationMisstatement,
381
382    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
383    /// Lease classification manipulation (operating vs finance).
384    LeaseClassificationManipulation,
385    /// Off-balance sheet lease fraud.
386    OffBalanceSheetLease,
387    /// Lease liability understatement.
388    LeaseLiabilityUnderstatement,
389    /// ROU asset misstatement.
390    RouAssetMisstatement,
391
392    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
393    /// Fair value hierarchy misclassification.
394    FairValueHierarchyManipulation,
395    /// Level 3 input manipulation.
396    Level3InputManipulation,
397    /// Valuation technique manipulation.
398    ValuationTechniqueManipulation,
399
400    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
401    /// Delayed impairment recognition.
402    DelayedImpairment,
403    /// Improperly avoiding impairment testing.
404    ImpairmentTestAvoidance,
405    /// Cash flow projection manipulation for impairment.
406    CashFlowProjectionManipulation,
407    /// Improper impairment reversal (IFRS only).
408    ImproperImpairmentReversal,
409
410    // Sourcing/Procurement Fraud (S2C)
411    /// Bid rigging or collusion among bidders.
412    BidRigging,
413    /// Contracts with phantom/shell vendors.
414    PhantomVendorContract,
415    /// Splitting contracts to avoid approval thresholds.
416    SplitContractThreshold,
417    /// Conflict of interest in sourcing decisions.
418    ConflictOfInterestSourcing,
419
420    // HR/Payroll Fraud (H2R)
421    /// Ghost employee on payroll.
422    GhostEmployeePayroll,
423    /// Payroll inflation/unauthorized raises.
424    PayrollInflation,
425    /// Duplicate expense report submission.
426    DuplicateExpenseReport,
427    /// Fictitious expense claims.
428    FictitiousExpense,
429    /// Splitting expenses to avoid approval threshold.
430    SplitExpenseToAvoidApproval,
431
432    // O2C Fraud
433    /// Revenue timing manipulation via quotes.
434    RevenueTimingManipulation,
435    /// Overriding quote prices without authorization.
436    QuotePriceOverride,
437}
438
439impl FraudType {
440    /// Returns severity level (1-5).
441    pub fn severity(&self) -> u8 {
442        match self {
443            FraudType::RoundDollarManipulation => 2,
444            FraudType::JustBelowThreshold => 3,
445            FraudType::SelfApproval => 3,
446            FraudType::ExceededApprovalLimit => 3,
447            FraudType::DuplicatePayment => 3,
448            FraudType::FictitiousEntry => 4,
449            FraudType::RevenueManipulation => 5,
450            FraudType::FictitiousVendor => 5,
451            FraudType::ShellCompanyPayment => 5,
452            FraudType::AssetMisappropriation => 5,
453            FraudType::SegregationOfDutiesViolation => 4,
454            FraudType::CollusiveApproval => 5,
455            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
456            FraudType::ImproperRevenueRecognition => 5,
457            FraudType::ImproperPoAllocation => 4,
458            FraudType::VariableConsiderationManipulation => 4,
459            FraudType::ContractModificationMisstatement => 3,
460            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
461            FraudType::LeaseClassificationManipulation => 4,
462            FraudType::OffBalanceSheetLease => 5,
463            FraudType::LeaseLiabilityUnderstatement => 4,
464            FraudType::RouAssetMisstatement => 3,
465            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
466            FraudType::FairValueHierarchyManipulation => 4,
467            FraudType::Level3InputManipulation => 5,
468            FraudType::ValuationTechniqueManipulation => 4,
469            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
470            FraudType::DelayedImpairment => 4,
471            FraudType::ImpairmentTestAvoidance => 4,
472            FraudType::CashFlowProjectionManipulation => 5,
473            FraudType::ImproperImpairmentReversal => 3,
474            _ => 4,
475        }
476    }
477}
478
479/// Error types for error detection.
480#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
481pub enum ErrorType {
482    // Data Entry Errors
483    /// Duplicate document entry.
484    DuplicateEntry,
485    /// Reversed debit/credit amounts.
486    ReversedAmount,
487    /// Transposed digits in amount.
488    TransposedDigits,
489    /// Wrong decimal placement.
490    DecimalError,
491    /// Missing required field.
492    MissingField,
493    /// Invalid account code.
494    InvalidAccount,
495
496    // Timing Errors
497    /// Posted to wrong period.
498    WrongPeriod,
499    /// Backdated entry.
500    BackdatedEntry,
501    /// Future-dated entry.
502    FutureDatedEntry,
503    /// Cutoff error.
504    CutoffError,
505
506    // Classification Errors
507    /// Wrong account classification.
508    MisclassifiedAccount,
509    /// Wrong cost center.
510    WrongCostCenter,
511    /// Wrong company code.
512    WrongCompanyCode,
513
514    // Calculation Errors
515    /// Unbalanced journal entry.
516    UnbalancedEntry,
517    /// Rounding error.
518    RoundingError,
519    /// Currency conversion error.
520    CurrencyError,
521    /// Tax calculation error.
522    TaxCalculationError,
523
524    // Accounting Standards Errors (Non-Fraudulent)
525    /// Wrong revenue recognition timing (honest mistake).
526    RevenueTimingError,
527    /// Performance obligation allocation error.
528    PoAllocationError,
529    /// Lease classification error (operating vs finance).
530    LeaseClassificationError,
531    /// Lease calculation error (PV, amortization).
532    LeaseCalculationError,
533    /// Fair value measurement error.
534    FairValueError,
535    /// Impairment calculation error.
536    ImpairmentCalculationError,
537    /// Discount rate error.
538    DiscountRateError,
539    /// Framework application error (IFRS vs GAAP).
540    FrameworkApplicationError,
541}
542
543impl ErrorType {
544    /// Returns severity level (1-5).
545    pub fn severity(&self) -> u8 {
546        match self {
547            ErrorType::RoundingError => 1,
548            ErrorType::MissingField => 2,
549            ErrorType::TransposedDigits => 2,
550            ErrorType::DecimalError => 3,
551            ErrorType::DuplicateEntry => 3,
552            ErrorType::ReversedAmount => 3,
553            ErrorType::WrongPeriod => 4,
554            ErrorType::UnbalancedEntry => 5,
555            ErrorType::CurrencyError => 4,
556            // Accounting Standards Errors
557            ErrorType::RevenueTimingError => 4,
558            ErrorType::PoAllocationError => 3,
559            ErrorType::LeaseClassificationError => 3,
560            ErrorType::LeaseCalculationError => 3,
561            ErrorType::FairValueError => 4,
562            ErrorType::ImpairmentCalculationError => 4,
563            ErrorType::DiscountRateError => 3,
564            ErrorType::FrameworkApplicationError => 4,
565            _ => 3,
566        }
567    }
568}
569
570/// Process issue types.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
572pub enum ProcessIssueType {
573    // Approval Issues
574    /// Approval skipped entirely.
575    SkippedApproval,
576    /// Late approval (after posting).
577    LateApproval,
578    /// Missing supporting documentation.
579    MissingDocumentation,
580    /// Incomplete approval chain.
581    IncompleteApprovalChain,
582
583    // Timing Issues
584    /// Late posting.
585    LatePosting,
586    /// Posting outside business hours.
587    AfterHoursPosting,
588    /// Weekend/holiday posting.
589    WeekendPosting,
590    /// Rushed period-end posting.
591    RushedPeriodEnd,
592
593    // Control Issues
594    /// Manual override of system control.
595    ManualOverride,
596    /// Unusual user access pattern.
597    UnusualAccess,
598    /// System bypass.
599    SystemBypass,
600    /// Batch processing anomaly.
601    BatchAnomaly,
602
603    // Documentation Issues
604    /// Vague or missing description.
605    VagueDescription,
606    /// Changed after posting.
607    PostFactoChange,
608    /// Incomplete audit trail.
609    IncompleteAuditTrail,
610
611    // Sourcing/Procurement Issues (S2C)
612    /// Purchasing outside of contracts (maverick spend).
613    MaverickSpend,
614    /// Purchasing against an expired contract.
615    ExpiredContractPurchase,
616    /// Overriding contracted price without authorization.
617    ContractPriceOverride,
618    /// Award given with only a single bid received.
619    SingleBidAward,
620    /// Bypassing supplier qualification requirements.
621    QualificationBypass,
622
623    // O2C Issues
624    /// Converting an expired quote to a sales order.
625    ExpiredQuoteConversion,
626}
627
628impl ProcessIssueType {
629    /// Returns severity level (1-5).
630    pub fn severity(&self) -> u8 {
631        match self {
632            ProcessIssueType::VagueDescription => 1,
633            ProcessIssueType::LatePosting => 2,
634            ProcessIssueType::AfterHoursPosting => 2,
635            ProcessIssueType::WeekendPosting => 2,
636            ProcessIssueType::SkippedApproval => 4,
637            ProcessIssueType::ManualOverride => 4,
638            ProcessIssueType::SystemBypass => 5,
639            ProcessIssueType::IncompleteAuditTrail => 4,
640            _ => 3,
641        }
642    }
643}
644
645/// Statistical anomaly types.
646#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
647pub enum StatisticalAnomalyType {
648    // Amount Anomalies
649    /// Amount significantly above normal.
650    UnusuallyHighAmount,
651    /// Amount significantly below normal.
652    UnusuallyLowAmount,
653    /// Violates Benford's Law distribution.
654    BenfordViolation,
655    /// Exact duplicate amount (suspicious).
656    ExactDuplicateAmount,
657    /// Repeating pattern in amounts.
658    RepeatingAmount,
659
660    // Frequency Anomalies
661    /// Unusual transaction frequency.
662    UnusualFrequency,
663    /// Burst of transactions.
664    TransactionBurst,
665    /// Unusual time of day.
666    UnusualTiming,
667
668    // Trend Anomalies
669    /// Break in historical trend.
670    TrendBreak,
671    /// Sudden level shift.
672    LevelShift,
673    /// Seasonal pattern violation.
674    SeasonalAnomaly,
675
676    // Distribution Anomalies
677    /// Outlier in distribution.
678    StatisticalOutlier,
679    /// Change in variance.
680    VarianceChange,
681    /// Distribution shift.
682    DistributionShift,
683
684    // Sourcing/Contract Anomalies
685    /// Pattern of SLA breaches from a vendor.
686    SlaBreachPattern,
687    /// Contract with zero utilization.
688    UnusedContract,
689
690    // HR/Payroll Anomalies
691    /// Anomalous overtime patterns.
692    OvertimeAnomaly,
693}
694
695impl StatisticalAnomalyType {
696    /// Returns severity level (1-5).
697    pub fn severity(&self) -> u8 {
698        match self {
699            StatisticalAnomalyType::UnusualTiming => 1,
700            StatisticalAnomalyType::UnusualFrequency => 2,
701            StatisticalAnomalyType::BenfordViolation => 2,
702            StatisticalAnomalyType::UnusuallyHighAmount => 3,
703            StatisticalAnomalyType::TrendBreak => 3,
704            StatisticalAnomalyType::TransactionBurst => 4,
705            StatisticalAnomalyType::ExactDuplicateAmount => 3,
706            _ => 3,
707        }
708    }
709}
710
711/// Relational/graph anomaly types.
712#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
713pub enum RelationalAnomalyType {
714    // Transaction Pattern Anomalies
715    /// Circular transaction pattern.
716    CircularTransaction,
717    /// Unusual account combination.
718    UnusualAccountPair,
719    /// New trading partner.
720    NewCounterparty,
721    /// Dormant account suddenly active.
722    DormantAccountActivity,
723
724    // Network Anomalies
725    /// Unusual network centrality.
726    CentralityAnomaly,
727    /// Isolated transaction cluster.
728    IsolatedCluster,
729    /// Bridge node anomaly.
730    BridgeNodeAnomaly,
731    /// Community structure change.
732    CommunityAnomaly,
733
734    // Relationship Anomalies
735    /// Missing expected relationship.
736    MissingRelationship,
737    /// Unexpected relationship.
738    UnexpectedRelationship,
739    /// Relationship strength change.
740    RelationshipStrengthChange,
741
742    // Intercompany Anomalies
743    /// Unmatched intercompany transaction.
744    UnmatchedIntercompany,
745    /// Circular intercompany flow.
746    CircularIntercompany,
747    /// Transfer pricing anomaly.
748    TransferPricingAnomaly,
749}
750
751impl RelationalAnomalyType {
752    /// Returns severity level (1-5).
753    pub fn severity(&self) -> u8 {
754        match self {
755            RelationalAnomalyType::NewCounterparty => 1,
756            RelationalAnomalyType::DormantAccountActivity => 2,
757            RelationalAnomalyType::UnusualAccountPair => 2,
758            RelationalAnomalyType::CircularTransaction => 4,
759            RelationalAnomalyType::CircularIntercompany => 4,
760            RelationalAnomalyType::TransferPricingAnomaly => 4,
761            RelationalAnomalyType::UnmatchedIntercompany => 3,
762            _ => 3,
763        }
764    }
765}
766
767/// A labeled anomaly for supervised learning.
768#[derive(Debug, Clone, Serialize, Deserialize)]
769pub struct LabeledAnomaly {
770    /// Unique anomaly identifier.
771    pub anomaly_id: String,
772    /// Type of anomaly.
773    pub anomaly_type: AnomalyType,
774    /// Document or entity that contains the anomaly.
775    pub document_id: String,
776    /// Document type (JE, PO, Invoice, etc.).
777    pub document_type: String,
778    /// Company code.
779    pub company_code: String,
780    /// Date the anomaly occurred.
781    pub anomaly_date: NaiveDate,
782    /// Timestamp when detected/injected.
783    #[serde(with = "crate::serde_timestamp::naive")]
784    pub detection_timestamp: NaiveDateTime,
785    /// Confidence score (0.0 - 1.0) for injected anomalies.
786    pub confidence: f64,
787    /// Severity (1-5).
788    pub severity: u8,
789    /// Description of the anomaly.
790    pub description: String,
791    /// Related entities (user IDs, account codes, etc.).
792    pub related_entities: Vec<String>,
793    /// Monetary impact if applicable.
794    pub monetary_impact: Option<Decimal>,
795    /// Additional metadata.
796    pub metadata: HashMap<String, String>,
797    /// Whether this was injected (true) or naturally occurring (false).
798    pub is_injected: bool,
799    /// Injection strategy used (if injected) - legacy string field.
800    pub injection_strategy: Option<String>,
801    /// Cluster ID if part of an anomaly cluster.
802    pub cluster_id: Option<String>,
803
804    // ========================================
805    // PROVENANCE TRACKING FIELDS (Phase 1.2)
806    // ========================================
807    /// Hash of the original document before modification.
808    /// Enables tracking what the document looked like pre-injection.
809    #[serde(default, skip_serializing_if = "Option::is_none")]
810    pub original_document_hash: Option<String>,
811
812    /// Causal reason explaining why this anomaly was injected.
813    /// Provides "why" tracking for each anomaly.
814    #[serde(default, skip_serializing_if = "Option::is_none")]
815    pub causal_reason: Option<AnomalyCausalReason>,
816
817    /// Structured injection strategy with parameters.
818    /// More detailed than the legacy string-based injection_strategy field.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub structured_strategy: Option<InjectionStrategy>,
821
822    /// Parent anomaly ID if this was derived from another anomaly.
823    /// Enables anomaly transformation chains.
824    #[serde(default, skip_serializing_if = "Option::is_none")]
825    pub parent_anomaly_id: Option<String>,
826
827    /// Child anomaly IDs that were derived from this anomaly.
828    #[serde(default, skip_serializing_if = "Vec::is_empty")]
829    pub child_anomaly_ids: Vec<String>,
830
831    /// Scenario ID if this anomaly is part of a multi-step scenario.
832    #[serde(default, skip_serializing_if = "Option::is_none")]
833    pub scenario_id: Option<String>,
834
835    /// Generation run ID that produced this anomaly.
836    /// Enables tracing anomalies back to their generation run.
837    #[serde(default, skip_serializing_if = "Option::is_none")]
838    pub run_id: Option<String>,
839
840    /// Seed used for RNG during generation.
841    /// Enables reproducibility.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub generation_seed: Option<u64>,
844}
845
846impl LabeledAnomaly {
847    /// Creates a new labeled anomaly.
848    pub fn new(
849        anomaly_id: String,
850        anomaly_type: AnomalyType,
851        document_id: String,
852        document_type: String,
853        company_code: String,
854        anomaly_date: NaiveDate,
855    ) -> Self {
856        let severity = anomaly_type.severity();
857        let description = format!(
858            "{} - {} in document {}",
859            anomaly_type.category(),
860            anomaly_type.type_name(),
861            document_id
862        );
863
864        Self {
865            anomaly_id,
866            anomaly_type,
867            document_id,
868            document_type,
869            company_code,
870            anomaly_date,
871            detection_timestamp: chrono::Local::now().naive_local(),
872            confidence: 1.0,
873            severity,
874            description,
875            related_entities: Vec::new(),
876            monetary_impact: None,
877            metadata: HashMap::new(),
878            is_injected: true,
879            injection_strategy: None,
880            cluster_id: None,
881            // Provenance fields
882            original_document_hash: None,
883            causal_reason: None,
884            structured_strategy: None,
885            parent_anomaly_id: None,
886            child_anomaly_ids: Vec::new(),
887            scenario_id: None,
888            run_id: None,
889            generation_seed: None,
890        }
891    }
892
893    /// Sets the description.
894    pub fn with_description(mut self, description: &str) -> Self {
895        self.description = description.to_string();
896        self
897    }
898
899    /// Sets the monetary impact.
900    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
901        self.monetary_impact = Some(impact);
902        self
903    }
904
905    /// Adds a related entity.
906    pub fn with_related_entity(mut self, entity: &str) -> Self {
907        self.related_entities.push(entity.to_string());
908        self
909    }
910
911    /// Adds metadata.
912    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
913        self.metadata.insert(key.to_string(), value.to_string());
914        self
915    }
916
917    /// Sets the injection strategy (legacy string).
918    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
919        self.injection_strategy = Some(strategy.to_string());
920        self
921    }
922
923    /// Sets the cluster ID.
924    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
925        self.cluster_id = Some(cluster_id.to_string());
926        self
927    }
928
929    // ========================================
930    // PROVENANCE BUILDER METHODS (Phase 1.2)
931    // ========================================
932
933    /// Sets the original document hash for provenance tracking.
934    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
935        self.original_document_hash = Some(hash.to_string());
936        self
937    }
938
939    /// Sets the causal reason for this anomaly.
940    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
941        self.causal_reason = Some(reason);
942        self
943    }
944
945    /// Sets the structured injection strategy.
946    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
947        // Also set the legacy string field for backward compatibility
948        self.injection_strategy = Some(strategy.strategy_type().to_string());
949        self.structured_strategy = Some(strategy);
950        self
951    }
952
953    /// Sets the parent anomaly ID (for anomaly derivation chains).
954    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
955        self.parent_anomaly_id = Some(parent_id.to_string());
956        self
957    }
958
959    /// Adds a child anomaly ID.
960    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
961        self.child_anomaly_ids.push(child_id.to_string());
962        self
963    }
964
965    /// Sets the scenario ID for multi-step scenario tracking.
966    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
967        self.scenario_id = Some(scenario_id.to_string());
968        self
969    }
970
971    /// Sets the generation run ID.
972    pub fn with_run_id(mut self, run_id: &str) -> Self {
973        self.run_id = Some(run_id.to_string());
974        self
975    }
976
977    /// Sets the generation seed for reproducibility.
978    pub fn with_generation_seed(mut self, seed: u64) -> Self {
979        self.generation_seed = Some(seed);
980        self
981    }
982
983    /// Sets multiple provenance fields at once for convenience.
984    pub fn with_provenance(
985        mut self,
986        run_id: Option<&str>,
987        seed: Option<u64>,
988        causal_reason: Option<AnomalyCausalReason>,
989    ) -> Self {
990        if let Some(id) = run_id {
991            self.run_id = Some(id.to_string());
992        }
993        self.generation_seed = seed;
994        self.causal_reason = causal_reason;
995        self
996    }
997
998    /// Converts to a feature vector for ML.
999    ///
1000    /// Returns a vector of 15 features:
1001    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1002    /// - 1 feature: Severity (normalized 0-1)
1003    /// - 1 feature: Confidence
1004    /// - 1 feature: Has monetary impact (0/1)
1005    /// - 1 feature: Monetary impact (log-scaled)
1006    /// - 1 feature: Is intentional (0/1)
1007    /// - 1 feature: Number of related entities
1008    /// - 1 feature: Is part of cluster (0/1)
1009    /// - 1 feature: Is part of scenario (0/1)
1010    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1011    pub fn to_features(&self) -> Vec<f64> {
1012        let mut features = Vec::new();
1013
1014        // Category one-hot encoding
1015        let categories = [
1016            "Fraud",
1017            "Error",
1018            "ProcessIssue",
1019            "Statistical",
1020            "Relational",
1021            "Custom",
1022        ];
1023        for cat in &categories {
1024            features.push(if self.anomaly_type.category() == *cat {
1025                1.0
1026            } else {
1027                0.0
1028            });
1029        }
1030
1031        // Severity (normalized)
1032        features.push(self.severity as f64 / 5.0);
1033
1034        // Confidence
1035        features.push(self.confidence);
1036
1037        // Has monetary impact
1038        features.push(if self.monetary_impact.is_some() {
1039            1.0
1040        } else {
1041            0.0
1042        });
1043
1044        // Monetary impact (log-scaled)
1045        if let Some(impact) = self.monetary_impact {
1046            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1047            features.push((impact_f64.abs() + 1.0).ln());
1048        } else {
1049            features.push(0.0);
1050        }
1051
1052        // Is intentional
1053        features.push(if self.anomaly_type.is_intentional() {
1054            1.0
1055        } else {
1056            0.0
1057        });
1058
1059        // Number of related entities
1060        features.push(self.related_entities.len() as f64);
1061
1062        // Is part of cluster
1063        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1064
1065        // Provenance features
1066        // Is part of scenario
1067        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1068
1069        // Has parent anomaly (indicates this is a derived anomaly)
1070        features.push(if self.parent_anomaly_id.is_some() {
1071            1.0
1072        } else {
1073            0.0
1074        });
1075
1076        features
1077    }
1078
1079    /// Returns the number of features in the feature vector.
1080    pub fn feature_count() -> usize {
1081        15 // 6 category + 9 other features
1082    }
1083
1084    /// Returns feature names for documentation/ML metadata.
1085    pub fn feature_names() -> Vec<&'static str> {
1086        vec![
1087            "category_fraud",
1088            "category_error",
1089            "category_process_issue",
1090            "category_statistical",
1091            "category_relational",
1092            "category_custom",
1093            "severity_normalized",
1094            "confidence",
1095            "has_monetary_impact",
1096            "monetary_impact_log",
1097            "is_intentional",
1098            "related_entity_count",
1099            "is_clustered",
1100            "is_scenario_part",
1101            "is_derived",
1102        ]
1103    }
1104}
1105
1106/// Summary of anomalies for reporting.
1107#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1108pub struct AnomalySummary {
1109    /// Total anomaly count.
1110    pub total_count: usize,
1111    /// Count by category.
1112    pub by_category: HashMap<String, usize>,
1113    /// Count by specific type.
1114    pub by_type: HashMap<String, usize>,
1115    /// Count by severity.
1116    pub by_severity: HashMap<u8, usize>,
1117    /// Count by company.
1118    pub by_company: HashMap<String, usize>,
1119    /// Total monetary impact.
1120    pub total_monetary_impact: Decimal,
1121    /// Date range.
1122    pub date_range: Option<(NaiveDate, NaiveDate)>,
1123    /// Number of clusters.
1124    pub cluster_count: usize,
1125}
1126
1127impl AnomalySummary {
1128    /// Creates a summary from a list of anomalies.
1129    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1130        let mut summary = AnomalySummary {
1131            total_count: anomalies.len(),
1132            ..Default::default()
1133        };
1134
1135        let mut min_date: Option<NaiveDate> = None;
1136        let mut max_date: Option<NaiveDate> = None;
1137        let mut clusters = std::collections::HashSet::new();
1138
1139        for anomaly in anomalies {
1140            // By category
1141            *summary
1142                .by_category
1143                .entry(anomaly.anomaly_type.category().to_string())
1144                .or_insert(0) += 1;
1145
1146            // By type
1147            *summary
1148                .by_type
1149                .entry(anomaly.anomaly_type.type_name())
1150                .or_insert(0) += 1;
1151
1152            // By severity
1153            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1154
1155            // By company
1156            *summary
1157                .by_company
1158                .entry(anomaly.company_code.clone())
1159                .or_insert(0) += 1;
1160
1161            // Monetary impact
1162            if let Some(impact) = anomaly.monetary_impact {
1163                summary.total_monetary_impact += impact;
1164            }
1165
1166            // Date range
1167            match min_date {
1168                None => min_date = Some(anomaly.anomaly_date),
1169                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1170                _ => {}
1171            }
1172            match max_date {
1173                None => max_date = Some(anomaly.anomaly_date),
1174                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1175                _ => {}
1176            }
1177
1178            // Clusters
1179            if let Some(cluster_id) = &anomaly.cluster_id {
1180                clusters.insert(cluster_id.clone());
1181            }
1182        }
1183
1184        summary.date_range = min_date.zip(max_date);
1185        summary.cluster_count = clusters.len();
1186
1187        summary
1188    }
1189}
1190
1191// ============================================================================
1192// ENHANCED ANOMALY TAXONOMY (FR-003)
1193// ============================================================================
1194
1195/// High-level anomaly category for multi-class classification.
1196///
1197/// These categories provide a more granular classification than the base
1198/// AnomalyType enum, enabling better ML model training and audit reporting.
1199#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1200pub enum AnomalyCategory {
1201    // Vendor-related anomalies
1202    /// Fictitious or shell vendor.
1203    FictitiousVendor,
1204    /// Kickback or collusion with vendor.
1205    VendorKickback,
1206    /// Related party vendor transactions.
1207    RelatedPartyVendor,
1208
1209    // Transaction-related anomalies
1210    /// Duplicate payment or invoice.
1211    DuplicatePayment,
1212    /// Unauthorized transaction.
1213    UnauthorizedTransaction,
1214    /// Structured transactions to avoid thresholds.
1215    StructuredTransaction,
1216
1217    // Pattern-based anomalies
1218    /// Circular flow of funds.
1219    CircularFlow,
1220    /// Behavioral anomaly (deviation from normal patterns).
1221    BehavioralAnomaly,
1222    /// Timing-based anomaly.
1223    TimingAnomaly,
1224
1225    // Journal entry anomalies
1226    /// Manual journal entry anomaly.
1227    JournalAnomaly,
1228    /// Manual override of controls.
1229    ManualOverride,
1230    /// Missing approval in chain.
1231    MissingApproval,
1232
1233    // Statistical anomalies
1234    /// Statistical outlier.
1235    StatisticalOutlier,
1236    /// Distribution anomaly (Benford, etc.).
1237    DistributionAnomaly,
1238
1239    // Custom category
1240    /// User-defined category.
1241    Custom(String),
1242}
1243
1244impl AnomalyCategory {
1245    /// Derives an AnomalyCategory from an AnomalyType.
1246    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1247        match anomaly_type {
1248            AnomalyType::Fraud(fraud_type) => match fraud_type {
1249                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1250                    AnomalyCategory::FictitiousVendor
1251                }
1252                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1253                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1254                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1255                    AnomalyCategory::StructuredTransaction
1256                }
1257                FraudType::SelfApproval
1258                | FraudType::UnauthorizedApproval
1259                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1260                FraudType::TimingAnomaly
1261                | FraudType::RoundDollarManipulation
1262                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1263                _ => AnomalyCategory::BehavioralAnomaly,
1264            },
1265            AnomalyType::Error(error_type) => match error_type {
1266                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1267                ErrorType::WrongPeriod
1268                | ErrorType::BackdatedEntry
1269                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1270                _ => AnomalyCategory::JournalAnomaly,
1271            },
1272            AnomalyType::ProcessIssue(process_type) => match process_type {
1273                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1274                    AnomalyCategory::MissingApproval
1275                }
1276                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1277                    AnomalyCategory::ManualOverride
1278                }
1279                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1280                    AnomalyCategory::TimingAnomaly
1281                }
1282                _ => AnomalyCategory::BehavioralAnomaly,
1283            },
1284            AnomalyType::Statistical(stat_type) => match stat_type {
1285                StatisticalAnomalyType::BenfordViolation
1286                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1287                _ => AnomalyCategory::StatisticalOutlier,
1288            },
1289            AnomalyType::Relational(rel_type) => match rel_type {
1290                RelationalAnomalyType::CircularTransaction
1291                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1292                _ => AnomalyCategory::BehavioralAnomaly,
1293            },
1294            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1295        }
1296    }
1297
1298    /// Returns the category name as a string.
1299    pub fn name(&self) -> &str {
1300        match self {
1301            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1302            AnomalyCategory::VendorKickback => "vendor_kickback",
1303            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1304            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1305            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1306            AnomalyCategory::StructuredTransaction => "structured_transaction",
1307            AnomalyCategory::CircularFlow => "circular_flow",
1308            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1309            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1310            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1311            AnomalyCategory::ManualOverride => "manual_override",
1312            AnomalyCategory::MissingApproval => "missing_approval",
1313            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1314            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1315            AnomalyCategory::Custom(s) => s.as_str(),
1316        }
1317    }
1318
1319    /// Returns the ordinal value for ML encoding.
1320    pub fn ordinal(&self) -> u8 {
1321        match self {
1322            AnomalyCategory::FictitiousVendor => 0,
1323            AnomalyCategory::VendorKickback => 1,
1324            AnomalyCategory::RelatedPartyVendor => 2,
1325            AnomalyCategory::DuplicatePayment => 3,
1326            AnomalyCategory::UnauthorizedTransaction => 4,
1327            AnomalyCategory::StructuredTransaction => 5,
1328            AnomalyCategory::CircularFlow => 6,
1329            AnomalyCategory::BehavioralAnomaly => 7,
1330            AnomalyCategory::TimingAnomaly => 8,
1331            AnomalyCategory::JournalAnomaly => 9,
1332            AnomalyCategory::ManualOverride => 10,
1333            AnomalyCategory::MissingApproval => 11,
1334            AnomalyCategory::StatisticalOutlier => 12,
1335            AnomalyCategory::DistributionAnomaly => 13,
1336            AnomalyCategory::Custom(_) => 14,
1337        }
1338    }
1339
1340    /// Returns the total number of categories (excluding Custom).
1341    pub fn category_count() -> usize {
1342        15 // 14 fixed categories + Custom
1343    }
1344}
1345
1346/// Type of contributing factor for anomaly confidence/severity calculation.
1347#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1348pub enum FactorType {
1349    /// Amount deviation from expected value.
1350    AmountDeviation,
1351    /// Proximity to approval/reporting threshold.
1352    ThresholdProximity,
1353    /// Timing-related anomaly indicator.
1354    TimingAnomaly,
1355    /// Entity risk score contribution.
1356    EntityRisk,
1357    /// Pattern match confidence.
1358    PatternMatch,
1359    /// Frequency deviation from normal.
1360    FrequencyDeviation,
1361    /// Relationship-based anomaly indicator.
1362    RelationshipAnomaly,
1363    /// Control bypass indicator.
1364    ControlBypass,
1365    /// Benford's Law violation.
1366    BenfordViolation,
1367    /// Duplicate indicator.
1368    DuplicateIndicator,
1369    /// Approval chain issue.
1370    ApprovalChainIssue,
1371    /// Documentation gap.
1372    DocumentationGap,
1373    /// Custom factor type.
1374    Custom,
1375}
1376
1377impl FactorType {
1378    /// Returns the factor type name.
1379    pub fn name(&self) -> &'static str {
1380        match self {
1381            FactorType::AmountDeviation => "amount_deviation",
1382            FactorType::ThresholdProximity => "threshold_proximity",
1383            FactorType::TimingAnomaly => "timing_anomaly",
1384            FactorType::EntityRisk => "entity_risk",
1385            FactorType::PatternMatch => "pattern_match",
1386            FactorType::FrequencyDeviation => "frequency_deviation",
1387            FactorType::RelationshipAnomaly => "relationship_anomaly",
1388            FactorType::ControlBypass => "control_bypass",
1389            FactorType::BenfordViolation => "benford_violation",
1390            FactorType::DuplicateIndicator => "duplicate_indicator",
1391            FactorType::ApprovalChainIssue => "approval_chain_issue",
1392            FactorType::DocumentationGap => "documentation_gap",
1393            FactorType::Custom => "custom",
1394        }
1395    }
1396}
1397
1398/// Evidence supporting a contributing factor.
1399#[derive(Debug, Clone, Serialize, Deserialize)]
1400pub struct FactorEvidence {
1401    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1402    pub source: String,
1403    /// Raw evidence data.
1404    pub data: HashMap<String, String>,
1405}
1406
1407/// A contributing factor to anomaly confidence/severity.
1408#[derive(Debug, Clone, Serialize, Deserialize)]
1409pub struct ContributingFactor {
1410    /// Type of factor.
1411    pub factor_type: FactorType,
1412    /// Observed value.
1413    pub value: f64,
1414    /// Threshold or expected value.
1415    pub threshold: f64,
1416    /// Direction of comparison (true = value > threshold is anomalous).
1417    pub direction_greater: bool,
1418    /// Weight of this factor in overall calculation (0.0 - 1.0).
1419    pub weight: f64,
1420    /// Human-readable description.
1421    pub description: String,
1422    /// Optional supporting evidence.
1423    pub evidence: Option<FactorEvidence>,
1424}
1425
1426impl ContributingFactor {
1427    /// Creates a new contributing factor.
1428    pub fn new(
1429        factor_type: FactorType,
1430        value: f64,
1431        threshold: f64,
1432        direction_greater: bool,
1433        weight: f64,
1434        description: &str,
1435    ) -> Self {
1436        Self {
1437            factor_type,
1438            value,
1439            threshold,
1440            direction_greater,
1441            weight,
1442            description: description.to_string(),
1443            evidence: None,
1444        }
1445    }
1446
1447    /// Adds evidence to the factor.
1448    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1449        self.evidence = Some(FactorEvidence {
1450            source: source.to_string(),
1451            data,
1452        });
1453        self
1454    }
1455
1456    /// Calculates the factor's contribution to anomaly score.
1457    pub fn contribution(&self) -> f64 {
1458        let deviation = if self.direction_greater {
1459            (self.value - self.threshold).max(0.0)
1460        } else {
1461            (self.threshold - self.value).max(0.0)
1462        };
1463
1464        // Normalize by threshold to get relative deviation
1465        let relative_deviation = if self.threshold.abs() > 0.001 {
1466            deviation / self.threshold.abs()
1467        } else {
1468            deviation
1469        };
1470
1471        // Apply weight and cap at 1.0
1472        (relative_deviation * self.weight).min(1.0)
1473    }
1474}
1475
1476/// Enhanced anomaly label with dynamic confidence and severity.
1477#[derive(Debug, Clone, Serialize, Deserialize)]
1478pub struct EnhancedAnomalyLabel {
1479    /// Base labeled anomaly (backward compatible).
1480    pub base: LabeledAnomaly,
1481    /// Enhanced category classification.
1482    pub category: AnomalyCategory,
1483    /// Dynamically calculated confidence (0.0 - 1.0).
1484    pub enhanced_confidence: f64,
1485    /// Contextually calculated severity (0.0 - 1.0).
1486    pub enhanced_severity: f64,
1487    /// Factors contributing to confidence/severity.
1488    pub contributing_factors: Vec<ContributingFactor>,
1489    /// Secondary categories (for multi-label classification).
1490    pub secondary_categories: Vec<AnomalyCategory>,
1491}
1492
1493impl EnhancedAnomalyLabel {
1494    /// Creates an enhanced label from a base labeled anomaly.
1495    pub fn from_base(base: LabeledAnomaly) -> Self {
1496        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1497        let enhanced_confidence = base.confidence;
1498        let enhanced_severity = base.severity as f64 / 5.0;
1499
1500        Self {
1501            base,
1502            category,
1503            enhanced_confidence,
1504            enhanced_severity,
1505            contributing_factors: Vec::new(),
1506            secondary_categories: Vec::new(),
1507        }
1508    }
1509
1510    /// Sets the enhanced confidence.
1511    pub fn with_confidence(mut self, confidence: f64) -> Self {
1512        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1513        self
1514    }
1515
1516    /// Sets the enhanced severity.
1517    pub fn with_severity(mut self, severity: f64) -> Self {
1518        self.enhanced_severity = severity.clamp(0.0, 1.0);
1519        self
1520    }
1521
1522    /// Adds a contributing factor.
1523    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1524        self.contributing_factors.push(factor);
1525        self
1526    }
1527
1528    /// Adds a secondary category.
1529    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1530        if !self.secondary_categories.contains(&category) && category != self.category {
1531            self.secondary_categories.push(category);
1532        }
1533        self
1534    }
1535
1536    /// Converts to an extended feature vector.
1537    ///
1538    /// Returns base features (15) + enhanced features (10) = 25 features.
1539    pub fn to_features(&self) -> Vec<f64> {
1540        let mut features = self.base.to_features();
1541
1542        // Enhanced features
1543        features.push(self.enhanced_confidence);
1544        features.push(self.enhanced_severity);
1545        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1546        features.push(self.secondary_categories.len() as f64);
1547        features.push(self.contributing_factors.len() as f64);
1548
1549        // Max factor weight
1550        let max_weight = self
1551            .contributing_factors
1552            .iter()
1553            .map(|f| f.weight)
1554            .fold(0.0, f64::max);
1555        features.push(max_weight);
1556
1557        // Factor type indicators (binary flags for key factor types)
1558        let has_control_bypass = self
1559            .contributing_factors
1560            .iter()
1561            .any(|f| f.factor_type == FactorType::ControlBypass);
1562        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1563
1564        let has_amount_deviation = self
1565            .contributing_factors
1566            .iter()
1567            .any(|f| f.factor_type == FactorType::AmountDeviation);
1568        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1569
1570        let has_timing = self
1571            .contributing_factors
1572            .iter()
1573            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1574        features.push(if has_timing { 1.0 } else { 0.0 });
1575
1576        let has_pattern_match = self
1577            .contributing_factors
1578            .iter()
1579            .any(|f| f.factor_type == FactorType::PatternMatch);
1580        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1581
1582        features
1583    }
1584
1585    /// Returns the number of features in the enhanced feature vector.
1586    pub fn feature_count() -> usize {
1587        25 // 15 base + 10 enhanced
1588    }
1589
1590    /// Returns feature names for the enhanced feature vector.
1591    pub fn feature_names() -> Vec<&'static str> {
1592        let mut names = LabeledAnomaly::feature_names();
1593        names.extend(vec![
1594            "enhanced_confidence",
1595            "enhanced_severity",
1596            "category_ordinal",
1597            "secondary_category_count",
1598            "contributing_factor_count",
1599            "max_factor_weight",
1600            "has_control_bypass",
1601            "has_amount_deviation",
1602            "has_timing_factor",
1603            "has_pattern_match",
1604        ]);
1605        names
1606    }
1607}
1608
1609// ============================================================================
1610// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1611// ============================================================================
1612
1613/// Severity level classification for anomalies.
1614#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1615pub enum SeverityLevel {
1616    /// Minor issue, low impact.
1617    Low,
1618    /// Moderate issue, noticeable impact.
1619    #[default]
1620    Medium,
1621    /// Significant issue, substantial impact.
1622    High,
1623    /// Critical issue, severe impact requiring immediate attention.
1624    Critical,
1625}
1626
1627impl SeverityLevel {
1628    /// Returns the numeric value (1-4) for the severity level.
1629    pub fn numeric(&self) -> u8 {
1630        match self {
1631            SeverityLevel::Low => 1,
1632            SeverityLevel::Medium => 2,
1633            SeverityLevel::High => 3,
1634            SeverityLevel::Critical => 4,
1635        }
1636    }
1637
1638    /// Creates a severity level from a numeric value.
1639    pub fn from_numeric(value: u8) -> Self {
1640        match value {
1641            1 => SeverityLevel::Low,
1642            2 => SeverityLevel::Medium,
1643            3 => SeverityLevel::High,
1644            _ => SeverityLevel::Critical,
1645        }
1646    }
1647
1648    /// Creates a severity level from a normalized score (0.0-1.0).
1649    pub fn from_score(score: f64) -> Self {
1650        match score {
1651            s if s < 0.25 => SeverityLevel::Low,
1652            s if s < 0.50 => SeverityLevel::Medium,
1653            s if s < 0.75 => SeverityLevel::High,
1654            _ => SeverityLevel::Critical,
1655        }
1656    }
1657
1658    /// Returns a normalized score (0.0-1.0) for this severity level.
1659    pub fn to_score(&self) -> f64 {
1660        match self {
1661            SeverityLevel::Low => 0.125,
1662            SeverityLevel::Medium => 0.375,
1663            SeverityLevel::High => 0.625,
1664            SeverityLevel::Critical => 0.875,
1665        }
1666    }
1667}
1668
1669/// Structured severity scoring for anomalies.
1670#[derive(Debug, Clone, Serialize, Deserialize)]
1671pub struct AnomalySeverity {
1672    /// Severity level classification.
1673    pub level: SeverityLevel,
1674    /// Continuous severity score (0.0-1.0).
1675    pub score: f64,
1676    /// Absolute financial impact amount.
1677    pub financial_impact: Decimal,
1678    /// Whether this exceeds materiality threshold.
1679    pub is_material: bool,
1680    /// Materiality threshold used for determination.
1681    #[serde(default, skip_serializing_if = "Option::is_none")]
1682    pub materiality_threshold: Option<Decimal>,
1683}
1684
1685impl AnomalySeverity {
1686    /// Creates a new severity assessment.
1687    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1688        Self {
1689            level,
1690            score: level.to_score(),
1691            financial_impact,
1692            is_material: false,
1693            materiality_threshold: None,
1694        }
1695    }
1696
1697    /// Creates severity from a score, auto-determining level.
1698    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1699        Self {
1700            level: SeverityLevel::from_score(score),
1701            score: score.clamp(0.0, 1.0),
1702            financial_impact,
1703            is_material: false,
1704            materiality_threshold: None,
1705        }
1706    }
1707
1708    /// Sets the materiality assessment.
1709    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1710        self.materiality_threshold = Some(threshold);
1711        self.is_material = self.financial_impact.abs() >= threshold;
1712        self
1713    }
1714}
1715
1716impl Default for AnomalySeverity {
1717    fn default() -> Self {
1718        Self {
1719            level: SeverityLevel::Medium,
1720            score: 0.5,
1721            financial_impact: Decimal::ZERO,
1722            is_material: false,
1723            materiality_threshold: None,
1724        }
1725    }
1726}
1727
1728/// Detection difficulty classification for anomalies.
1729///
1730/// Categorizes how difficult an anomaly is to detect, which is useful
1731/// for ML model benchmarking and audit procedure selection.
1732///
1733/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1734/// is used for drift event classification and has different variants.
1735#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1736pub enum AnomalyDetectionDifficulty {
1737    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1738    Trivial,
1739    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1740    Easy,
1741    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1742    #[default]
1743    Moderate,
1744    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1745    Hard,
1746    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1747    Expert,
1748}
1749
1750impl AnomalyDetectionDifficulty {
1751    /// Returns the expected detection rate for this difficulty level.
1752    pub fn expected_detection_rate(&self) -> f64 {
1753        match self {
1754            AnomalyDetectionDifficulty::Trivial => 0.99,
1755            AnomalyDetectionDifficulty::Easy => 0.90,
1756            AnomalyDetectionDifficulty::Moderate => 0.70,
1757            AnomalyDetectionDifficulty::Hard => 0.40,
1758            AnomalyDetectionDifficulty::Expert => 0.15,
1759        }
1760    }
1761
1762    /// Returns a numeric difficulty score (0.0-1.0).
1763    pub fn difficulty_score(&self) -> f64 {
1764        match self {
1765            AnomalyDetectionDifficulty::Trivial => 0.05,
1766            AnomalyDetectionDifficulty::Easy => 0.25,
1767            AnomalyDetectionDifficulty::Moderate => 0.50,
1768            AnomalyDetectionDifficulty::Hard => 0.75,
1769            AnomalyDetectionDifficulty::Expert => 0.95,
1770        }
1771    }
1772
1773    /// Creates a difficulty level from a score (0.0-1.0).
1774    pub fn from_score(score: f64) -> Self {
1775        match score {
1776            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1777            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1778            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1779            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1780            _ => AnomalyDetectionDifficulty::Expert,
1781        }
1782    }
1783
1784    /// Returns the name of this difficulty level.
1785    pub fn name(&self) -> &'static str {
1786        match self {
1787            AnomalyDetectionDifficulty::Trivial => "trivial",
1788            AnomalyDetectionDifficulty::Easy => "easy",
1789            AnomalyDetectionDifficulty::Moderate => "moderate",
1790            AnomalyDetectionDifficulty::Hard => "hard",
1791            AnomalyDetectionDifficulty::Expert => "expert",
1792        }
1793    }
1794}
1795
1796/// Ground truth certainty level for anomaly labels.
1797///
1798/// Indicates how certain we are that the label is correct.
1799#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1800pub enum GroundTruthCertainty {
1801    /// Definitively known (injected anomaly with full provenance).
1802    #[default]
1803    Definite,
1804    /// Highly probable based on strong evidence.
1805    Probable,
1806    /// Possibly an anomaly based on indirect evidence.
1807    Possible,
1808}
1809
1810impl GroundTruthCertainty {
1811    /// Returns a certainty score (0.0-1.0).
1812    pub fn certainty_score(&self) -> f64 {
1813        match self {
1814            GroundTruthCertainty::Definite => 1.0,
1815            GroundTruthCertainty::Probable => 0.8,
1816            GroundTruthCertainty::Possible => 0.5,
1817        }
1818    }
1819
1820    /// Returns the name of this certainty level.
1821    pub fn name(&self) -> &'static str {
1822        match self {
1823            GroundTruthCertainty::Definite => "definite",
1824            GroundTruthCertainty::Probable => "probable",
1825            GroundTruthCertainty::Possible => "possible",
1826        }
1827    }
1828}
1829
1830/// Detection method classification.
1831///
1832/// Indicates which detection methods are recommended or effective for an anomaly.
1833#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1834pub enum DetectionMethod {
1835    /// Simple rule-based detection (thresholds, filters).
1836    RuleBased,
1837    /// Statistical analysis (distributions, outlier detection).
1838    Statistical,
1839    /// Machine learning models (classification, anomaly detection).
1840    MachineLearning,
1841    /// Graph-based analysis (network patterns, relationships).
1842    GraphBased,
1843    /// Manual forensic audit procedures.
1844    ForensicAudit,
1845    /// Combination of multiple methods.
1846    Hybrid,
1847}
1848
1849impl DetectionMethod {
1850    /// Returns the name of this detection method.
1851    pub fn name(&self) -> &'static str {
1852        match self {
1853            DetectionMethod::RuleBased => "rule_based",
1854            DetectionMethod::Statistical => "statistical",
1855            DetectionMethod::MachineLearning => "machine_learning",
1856            DetectionMethod::GraphBased => "graph_based",
1857            DetectionMethod::ForensicAudit => "forensic_audit",
1858            DetectionMethod::Hybrid => "hybrid",
1859        }
1860    }
1861
1862    /// Returns a description of this detection method.
1863    pub fn description(&self) -> &'static str {
1864        match self {
1865            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1866            DetectionMethod::Statistical => "Statistical distribution analysis",
1867            DetectionMethod::MachineLearning => "ML classification models",
1868            DetectionMethod::GraphBased => "Network and relationship analysis",
1869            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1870            DetectionMethod::Hybrid => "Combined multi-method approach",
1871        }
1872    }
1873}
1874
1875/// Extended anomaly label with comprehensive multi-dimensional classification.
1876///
1877/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1878/// severity scoring, detection difficulty, recommended methods, and ground truth.
1879#[derive(Debug, Clone, Serialize, Deserialize)]
1880pub struct ExtendedAnomalyLabel {
1881    /// Base labeled anomaly.
1882    pub base: LabeledAnomaly,
1883    /// Enhanced category classification.
1884    pub category: AnomalyCategory,
1885    /// Structured severity assessment.
1886    pub severity: AnomalySeverity,
1887    /// Detection difficulty classification.
1888    pub detection_difficulty: AnomalyDetectionDifficulty,
1889    /// Recommended detection methods for this anomaly.
1890    pub recommended_methods: Vec<DetectionMethod>,
1891    /// Key indicators that should trigger detection.
1892    pub key_indicators: Vec<String>,
1893    /// Ground truth certainty level.
1894    pub ground_truth_certainty: GroundTruthCertainty,
1895    /// Contributing factors to confidence/severity.
1896    pub contributing_factors: Vec<ContributingFactor>,
1897    /// Related entity IDs (vendors, customers, employees, etc.).
1898    pub related_entity_ids: Vec<String>,
1899    /// Secondary categories for multi-label classification.
1900    pub secondary_categories: Vec<AnomalyCategory>,
1901    /// Scheme ID if part of a multi-stage fraud scheme.
1902    #[serde(default, skip_serializing_if = "Option::is_none")]
1903    pub scheme_id: Option<String>,
1904    /// Stage number within a scheme (1-indexed).
1905    #[serde(default, skip_serializing_if = "Option::is_none")]
1906    pub scheme_stage: Option<u32>,
1907    /// Whether this is a near-miss (suspicious but legitimate).
1908    #[serde(default)]
1909    pub is_near_miss: bool,
1910    /// Explanation if this is a near-miss.
1911    #[serde(default, skip_serializing_if = "Option::is_none")]
1912    pub near_miss_explanation: Option<String>,
1913}
1914
1915impl ExtendedAnomalyLabel {
1916    /// Creates an extended label from a base labeled anomaly.
1917    pub fn from_base(base: LabeledAnomaly) -> Self {
1918        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1919        let severity = AnomalySeverity {
1920            level: SeverityLevel::from_numeric(base.severity),
1921            score: base.severity as f64 / 5.0,
1922            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1923            is_material: false,
1924            materiality_threshold: None,
1925        };
1926
1927        Self {
1928            base,
1929            category,
1930            severity,
1931            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1932            recommended_methods: vec![DetectionMethod::RuleBased],
1933            key_indicators: Vec::new(),
1934            ground_truth_certainty: GroundTruthCertainty::Definite,
1935            contributing_factors: Vec::new(),
1936            related_entity_ids: Vec::new(),
1937            secondary_categories: Vec::new(),
1938            scheme_id: None,
1939            scheme_stage: None,
1940            is_near_miss: false,
1941            near_miss_explanation: None,
1942        }
1943    }
1944
1945    /// Sets the severity assessment.
1946    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1947        self.severity = severity;
1948        self
1949    }
1950
1951    /// Sets the detection difficulty.
1952    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1953        self.detection_difficulty = difficulty;
1954        self
1955    }
1956
1957    /// Adds a recommended detection method.
1958    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1959        if !self.recommended_methods.contains(&method) {
1960            self.recommended_methods.push(method);
1961        }
1962        self
1963    }
1964
1965    /// Sets the recommended detection methods.
1966    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1967        self.recommended_methods = methods;
1968        self
1969    }
1970
1971    /// Adds a key indicator.
1972    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1973        self.key_indicators.push(indicator.into());
1974        self
1975    }
1976
1977    /// Sets the ground truth certainty.
1978    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1979        self.ground_truth_certainty = certainty;
1980        self
1981    }
1982
1983    /// Adds a contributing factor.
1984    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1985        self.contributing_factors.push(factor);
1986        self
1987    }
1988
1989    /// Adds a related entity ID.
1990    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
1991        self.related_entity_ids.push(entity_id.into());
1992        self
1993    }
1994
1995    /// Adds a secondary category.
1996    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1997        if category != self.category && !self.secondary_categories.contains(&category) {
1998            self.secondary_categories.push(category);
1999        }
2000        self
2001    }
2002
2003    /// Sets scheme information.
2004    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2005        self.scheme_id = Some(scheme_id.into());
2006        self.scheme_stage = Some(stage);
2007        self
2008    }
2009
2010    /// Marks this as a near-miss with explanation.
2011    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2012        self.is_near_miss = true;
2013        self.near_miss_explanation = Some(explanation.into());
2014        self
2015    }
2016
2017    /// Converts to an extended feature vector for ML.
2018    ///
2019    /// Returns base features (15) + extended features (15) = 30 features.
2020    pub fn to_features(&self) -> Vec<f64> {
2021        let mut features = self.base.to_features();
2022
2023        // Extended features
2024        features.push(self.severity.score);
2025        features.push(self.severity.level.to_score());
2026        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2027        features.push(self.detection_difficulty.difficulty_score());
2028        features.push(self.detection_difficulty.expected_detection_rate());
2029        features.push(self.ground_truth_certainty.certainty_score());
2030        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2031        features.push(self.secondary_categories.len() as f64);
2032        features.push(self.contributing_factors.len() as f64);
2033        features.push(self.key_indicators.len() as f64);
2034        features.push(self.recommended_methods.len() as f64);
2035        features.push(self.related_entity_ids.len() as f64);
2036        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2037        features.push(self.scheme_stage.unwrap_or(0) as f64);
2038        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2039
2040        features
2041    }
2042
2043    /// Returns the number of features in the extended feature vector.
2044    pub fn feature_count() -> usize {
2045        30 // 15 base + 15 extended
2046    }
2047
2048    /// Returns feature names for the extended feature vector.
2049    pub fn feature_names() -> Vec<&'static str> {
2050        let mut names = LabeledAnomaly::feature_names();
2051        names.extend(vec![
2052            "severity_score",
2053            "severity_level_score",
2054            "is_material",
2055            "difficulty_score",
2056            "expected_detection_rate",
2057            "ground_truth_certainty",
2058            "category_ordinal",
2059            "secondary_category_count",
2060            "contributing_factor_count",
2061            "key_indicator_count",
2062            "recommended_method_count",
2063            "related_entity_count",
2064            "is_part_of_scheme",
2065            "scheme_stage",
2066            "is_near_miss",
2067        ]);
2068        names
2069    }
2070}
2071
2072// ============================================================================
2073// MULTI-STAGE FRAUD SCHEME TYPES
2074// ============================================================================
2075
2076/// Type of multi-stage fraud scheme.
2077#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2078pub enum SchemeType {
2079    /// Gradual embezzlement over time.
2080    GradualEmbezzlement,
2081    /// Revenue manipulation across periods.
2082    RevenueManipulation,
2083    /// Vendor kickback scheme.
2084    VendorKickback,
2085    /// Round-tripping funds through multiple entities.
2086    RoundTripping,
2087    /// Ghost employee scheme.
2088    GhostEmployee,
2089    /// Expense reimbursement fraud.
2090    ExpenseReimbursement,
2091    /// Inventory theft scheme.
2092    InventoryTheft,
2093    /// Custom scheme type.
2094    Custom,
2095}
2096
2097impl SchemeType {
2098    /// Returns the name of this scheme type.
2099    pub fn name(&self) -> &'static str {
2100        match self {
2101            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2102            SchemeType::RevenueManipulation => "revenue_manipulation",
2103            SchemeType::VendorKickback => "vendor_kickback",
2104            SchemeType::RoundTripping => "round_tripping",
2105            SchemeType::GhostEmployee => "ghost_employee",
2106            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2107            SchemeType::InventoryTheft => "inventory_theft",
2108            SchemeType::Custom => "custom",
2109        }
2110    }
2111
2112    /// Returns the typical number of stages for this scheme type.
2113    pub fn typical_stages(&self) -> u32 {
2114        match self {
2115            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2116            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2117            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2118            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2119            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2120            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2121            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2122            SchemeType::Custom => 4,
2123        }
2124    }
2125}
2126
2127/// Status of detection for a fraud scheme.
2128#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2129pub enum SchemeDetectionStatus {
2130    /// Scheme is undetected.
2131    #[default]
2132    Undetected,
2133    /// Under investigation but not confirmed.
2134    UnderInvestigation,
2135    /// Partially detected (some transactions flagged).
2136    PartiallyDetected,
2137    /// Fully detected and confirmed.
2138    FullyDetected,
2139}
2140
2141/// Reference to a transaction within a scheme.
2142#[derive(Debug, Clone, Serialize, Deserialize)]
2143pub struct SchemeTransactionRef {
2144    /// Document ID of the transaction.
2145    pub document_id: String,
2146    /// Transaction date.
2147    pub date: chrono::NaiveDate,
2148    /// Transaction amount.
2149    pub amount: Decimal,
2150    /// Stage this transaction belongs to.
2151    pub stage: u32,
2152    /// Anomaly ID if labeled.
2153    #[serde(default, skip_serializing_if = "Option::is_none")]
2154    pub anomaly_id: Option<String>,
2155}
2156
2157/// Concealment technique used in fraud.
2158#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2159pub enum ConcealmentTechnique {
2160    /// Document manipulation or forgery.
2161    DocumentManipulation,
2162    /// Circumventing approval processes.
2163    ApprovalCircumvention,
2164    /// Exploiting timing (period-end, holidays).
2165    TimingExploitation,
2166    /// Transaction splitting to avoid thresholds.
2167    TransactionSplitting,
2168    /// Account misclassification.
2169    AccountMisclassification,
2170    /// Collusion with other employees.
2171    Collusion,
2172    /// Data alteration or deletion.
2173    DataAlteration,
2174    /// Creating false documentation.
2175    FalseDocumentation,
2176}
2177
2178impl ConcealmentTechnique {
2179    /// Returns the difficulty bonus this technique adds.
2180    pub fn difficulty_bonus(&self) -> f64 {
2181        match self {
2182            ConcealmentTechnique::DocumentManipulation => 0.20,
2183            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2184            ConcealmentTechnique::TimingExploitation => 0.10,
2185            ConcealmentTechnique::TransactionSplitting => 0.15,
2186            ConcealmentTechnique::AccountMisclassification => 0.10,
2187            ConcealmentTechnique::Collusion => 0.25,
2188            ConcealmentTechnique::DataAlteration => 0.20,
2189            ConcealmentTechnique::FalseDocumentation => 0.15,
2190        }
2191    }
2192}
2193
2194// ============================================================================
2195// ACFE-ALIGNED FRAUD TAXONOMY
2196// ============================================================================
2197//
2198// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2199// Nations: Occupational Fraud Classification System. This taxonomy provides
2200// ACFE-aligned categories, schemes, and calibration data.
2201
2202/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2203///
2204/// ACFE Report to the Nations statistics (typical):
2205/// - Asset Misappropriation: 86% of cases, $100k median loss
2206/// - Corruption: 33% of cases, $150k median loss
2207/// - Financial Statement Fraud: 10% of cases, $954k median loss
2208///
2209/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2210#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2211pub enum AcfeFraudCategory {
2212    /// Theft of organizational assets (cash, inventory, equipment).
2213    /// Most common (86% of cases) but typically lowest median loss ($100k).
2214    #[default]
2215    AssetMisappropriation,
2216    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2217    /// Medium frequency (33% of cases), medium median loss ($150k).
2218    Corruption,
2219    /// Intentional misstatement of financial statements.
2220    /// Least common (10% of cases) but highest median loss ($954k).
2221    FinancialStatementFraud,
2222}
2223
2224impl AcfeFraudCategory {
2225    /// Returns the name of this category.
2226    pub fn name(&self) -> &'static str {
2227        match self {
2228            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2229            AcfeFraudCategory::Corruption => "corruption",
2230            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2231        }
2232    }
2233
2234    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2235    pub fn typical_occurrence_rate(&self) -> f64 {
2236        match self {
2237            AcfeFraudCategory::AssetMisappropriation => 0.86,
2238            AcfeFraudCategory::Corruption => 0.33,
2239            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2240        }
2241    }
2242
2243    /// Returns the typical median loss amount (from ACFE reports).
2244    pub fn typical_median_loss(&self) -> Decimal {
2245        match self {
2246            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2247            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2248            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2249        }
2250    }
2251
2252    /// Returns the typical detection time in months (from ACFE reports).
2253    pub fn typical_detection_months(&self) -> u32 {
2254        match self {
2255            AcfeFraudCategory::AssetMisappropriation => 12,
2256            AcfeFraudCategory::Corruption => 18,
2257            AcfeFraudCategory::FinancialStatementFraud => 24,
2258        }
2259    }
2260}
2261
2262/// Cash-based fraud schemes under Asset Misappropriation.
2263///
2264/// Organized according to the ACFE Fraud Tree:
2265/// - Theft of Cash on Hand
2266/// - Theft of Cash Receipts
2267/// - Fraudulent Disbursements
2268#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2269pub enum CashFraudScheme {
2270    // ========== Theft of Cash on Hand ==========
2271    /// Stealing cash from cash drawers or safes after it has been recorded.
2272    Larceny,
2273    /// Stealing cash before it is recorded in the books (intercepts receipts).
2274    Skimming,
2275
2276    // ========== Theft of Cash Receipts ==========
2277    /// Skimming from sales transactions before recording.
2278    SalesSkimming,
2279    /// Intercepting customer payments on accounts receivable.
2280    ReceivablesSkimming,
2281    /// Creating false refunds to pocket the difference.
2282    RefundSchemes,
2283
2284    // ========== Fraudulent Disbursements - Billing Schemes ==========
2285    /// Creating fictitious vendors to invoice and pay.
2286    ShellCompany,
2287    /// Manipulating payments to legitimate vendors for personal gain.
2288    NonAccompliceVendor,
2289    /// Using company funds for personal purchases.
2290    PersonalPurchases,
2291
2292    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2293    /// Creating fake employees to collect wages.
2294    GhostEmployee,
2295    /// Falsifying hours worked, sales commissions, or salary rates.
2296    FalsifiedWages,
2297    /// Manipulating commission calculations.
2298    CommissionSchemes,
2299
2300    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2301    /// Claiming non-business expenses as business expenses.
2302    MischaracterizedExpenses,
2303    /// Inflating legitimate expense amounts.
2304    OverstatedExpenses,
2305    /// Creating completely fictitious expenses.
2306    FictitiousExpenses,
2307
2308    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2309    /// Forging the signature of an authorized check signer.
2310    ForgedMaker,
2311    /// Intercepting and altering the endorsement on legitimate checks.
2312    ForgedEndorsement,
2313    /// Altering the payee on a legitimate check.
2314    AlteredPayee,
2315    /// Authorized signer writing checks for personal benefit.
2316    AuthorizedMaker,
2317
2318    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2319    /// Creating false voided transactions.
2320    FalseVoids,
2321    /// Processing fictitious refunds.
2322    FalseRefunds,
2323}
2324
2325impl CashFraudScheme {
2326    /// Returns the ACFE category this scheme belongs to.
2327    pub fn category(&self) -> AcfeFraudCategory {
2328        AcfeFraudCategory::AssetMisappropriation
2329    }
2330
2331    /// Returns the subcategory within the ACFE Fraud Tree.
2332    pub fn subcategory(&self) -> &'static str {
2333        match self {
2334            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2335            CashFraudScheme::SalesSkimming
2336            | CashFraudScheme::ReceivablesSkimming
2337            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2338            CashFraudScheme::ShellCompany
2339            | CashFraudScheme::NonAccompliceVendor
2340            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2341            CashFraudScheme::GhostEmployee
2342            | CashFraudScheme::FalsifiedWages
2343            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2344            CashFraudScheme::MischaracterizedExpenses
2345            | CashFraudScheme::OverstatedExpenses
2346            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2347            CashFraudScheme::ForgedMaker
2348            | CashFraudScheme::ForgedEndorsement
2349            | CashFraudScheme::AlteredPayee
2350            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2351            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2352        }
2353    }
2354
2355    /// Returns the typical severity (1-5) for this scheme.
2356    pub fn severity(&self) -> u8 {
2357        match self {
2358            // Lower severity - often small amounts, easier to detect
2359            CashFraudScheme::FalseVoids
2360            | CashFraudScheme::FalseRefunds
2361            | CashFraudScheme::MischaracterizedExpenses => 3,
2362            // Medium severity
2363            CashFraudScheme::OverstatedExpenses
2364            | CashFraudScheme::Skimming
2365            | CashFraudScheme::Larceny
2366            | CashFraudScheme::PersonalPurchases
2367            | CashFraudScheme::FalsifiedWages => 4,
2368            // Higher severity - larger amounts, harder to detect
2369            CashFraudScheme::ShellCompany
2370            | CashFraudScheme::GhostEmployee
2371            | CashFraudScheme::FictitiousExpenses
2372            | CashFraudScheme::ForgedMaker
2373            | CashFraudScheme::AuthorizedMaker => 5,
2374            _ => 4,
2375        }
2376    }
2377
2378    /// Returns the typical detection difficulty.
2379    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2380        match self {
2381            // Easy to detect with basic controls
2382            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2383                AnomalyDetectionDifficulty::Easy
2384            }
2385            // Moderate - requires reconciliation
2386            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2387                AnomalyDetectionDifficulty::Moderate
2388            }
2389            // Hard - requires sophisticated analysis
2390            CashFraudScheme::Skimming
2391            | CashFraudScheme::ShellCompany
2392            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2393            // Expert level
2394            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2395                AnomalyDetectionDifficulty::Expert
2396            }
2397            _ => AnomalyDetectionDifficulty::Moderate,
2398        }
2399    }
2400
2401    /// Returns all variants for iteration.
2402    pub fn all_variants() -> &'static [CashFraudScheme] {
2403        &[
2404            CashFraudScheme::Larceny,
2405            CashFraudScheme::Skimming,
2406            CashFraudScheme::SalesSkimming,
2407            CashFraudScheme::ReceivablesSkimming,
2408            CashFraudScheme::RefundSchemes,
2409            CashFraudScheme::ShellCompany,
2410            CashFraudScheme::NonAccompliceVendor,
2411            CashFraudScheme::PersonalPurchases,
2412            CashFraudScheme::GhostEmployee,
2413            CashFraudScheme::FalsifiedWages,
2414            CashFraudScheme::CommissionSchemes,
2415            CashFraudScheme::MischaracterizedExpenses,
2416            CashFraudScheme::OverstatedExpenses,
2417            CashFraudScheme::FictitiousExpenses,
2418            CashFraudScheme::ForgedMaker,
2419            CashFraudScheme::ForgedEndorsement,
2420            CashFraudScheme::AlteredPayee,
2421            CashFraudScheme::AuthorizedMaker,
2422            CashFraudScheme::FalseVoids,
2423            CashFraudScheme::FalseRefunds,
2424        ]
2425    }
2426}
2427
2428/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2429#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2430pub enum AssetFraudScheme {
2431    // ========== Inventory Schemes ==========
2432    /// Misusing or converting inventory for personal benefit.
2433    InventoryMisuse,
2434    /// Stealing physical inventory items.
2435    InventoryTheft,
2436    /// Manipulating purchasing to facilitate theft.
2437    InventoryPurchasingScheme,
2438    /// Manipulating receiving/shipping to steal inventory.
2439    InventoryReceivingScheme,
2440
2441    // ========== Other Asset Schemes ==========
2442    /// Misusing company equipment or vehicles.
2443    EquipmentMisuse,
2444    /// Theft of company equipment, tools, or supplies.
2445    EquipmentTheft,
2446    /// Unauthorized access to or theft of intellectual property.
2447    IntellectualPropertyTheft,
2448    /// Using company time/resources for personal business.
2449    TimeTheft,
2450}
2451
2452impl AssetFraudScheme {
2453    /// Returns the ACFE category this scheme belongs to.
2454    pub fn category(&self) -> AcfeFraudCategory {
2455        AcfeFraudCategory::AssetMisappropriation
2456    }
2457
2458    /// Returns the subcategory within the ACFE Fraud Tree.
2459    pub fn subcategory(&self) -> &'static str {
2460        match self {
2461            AssetFraudScheme::InventoryMisuse
2462            | AssetFraudScheme::InventoryTheft
2463            | AssetFraudScheme::InventoryPurchasingScheme
2464            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2465            _ => "other_assets",
2466        }
2467    }
2468
2469    /// Returns the typical severity (1-5) for this scheme.
2470    pub fn severity(&self) -> u8 {
2471        match self {
2472            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2473            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2474            AssetFraudScheme::InventoryTheft
2475            | AssetFraudScheme::InventoryPurchasingScheme
2476            | AssetFraudScheme::InventoryReceivingScheme => 4,
2477            AssetFraudScheme::IntellectualPropertyTheft => 5,
2478        }
2479    }
2480}
2481
2482/// Corruption schemes under the ACFE Fraud Tree.
2483///
2484/// Corruption schemes involve the wrongful use of influence in a business
2485/// transaction to procure personal benefit.
2486#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2487pub enum CorruptionScheme {
2488    // ========== Conflicts of Interest ==========
2489    /// Employee has undisclosed financial interest in purchasing decisions.
2490    PurchasingConflict,
2491    /// Employee has undisclosed relationship with customer/vendor.
2492    SalesConflict,
2493    /// Employee owns or has interest in competing business.
2494    OutsideBusinessInterest,
2495    /// Employee makes decisions benefiting family members.
2496    NepotismConflict,
2497
2498    // ========== Bribery ==========
2499    /// Kickback payments from vendors for favorable treatment.
2500    InvoiceKickback,
2501    /// Collusion among vendors to inflate prices.
2502    BidRigging,
2503    /// Other cash payments for favorable decisions.
2504    CashBribery,
2505    /// Bribery of government officials.
2506    PublicOfficial,
2507
2508    // ========== Illegal Gratuities ==========
2509    /// Gifts given after favorable decisions (not agreed in advance).
2510    IllegalGratuity,
2511
2512    // ========== Economic Extortion ==========
2513    /// Demanding payment under threat of adverse action.
2514    EconomicExtortion,
2515}
2516
2517impl CorruptionScheme {
2518    /// Returns the ACFE category this scheme belongs to.
2519    pub fn category(&self) -> AcfeFraudCategory {
2520        AcfeFraudCategory::Corruption
2521    }
2522
2523    /// Returns the subcategory within the ACFE Fraud Tree.
2524    pub fn subcategory(&self) -> &'static str {
2525        match self {
2526            CorruptionScheme::PurchasingConflict
2527            | CorruptionScheme::SalesConflict
2528            | CorruptionScheme::OutsideBusinessInterest
2529            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2530            CorruptionScheme::InvoiceKickback
2531            | CorruptionScheme::BidRigging
2532            | CorruptionScheme::CashBribery
2533            | CorruptionScheme::PublicOfficial => "bribery",
2534            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2535            CorruptionScheme::EconomicExtortion => "economic_extortion",
2536        }
2537    }
2538
2539    /// Returns the typical severity (1-5) for this scheme.
2540    pub fn severity(&self) -> u8 {
2541        match self {
2542            // Lower severity conflicts of interest
2543            CorruptionScheme::NepotismConflict => 3,
2544            // Medium severity
2545            CorruptionScheme::PurchasingConflict
2546            | CorruptionScheme::SalesConflict
2547            | CorruptionScheme::OutsideBusinessInterest
2548            | CorruptionScheme::IllegalGratuity => 4,
2549            // High severity - active corruption
2550            CorruptionScheme::InvoiceKickback
2551            | CorruptionScheme::BidRigging
2552            | CorruptionScheme::CashBribery
2553            | CorruptionScheme::EconomicExtortion => 5,
2554            // Highest severity - involves public officials
2555            CorruptionScheme::PublicOfficial => 5,
2556        }
2557    }
2558
2559    /// Returns the typical detection difficulty.
2560    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2561        match self {
2562            // Easier to detect with proper disclosure requirements
2563            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2564                AnomalyDetectionDifficulty::Moderate
2565            }
2566            // Hard - requires transaction pattern analysis
2567            CorruptionScheme::PurchasingConflict
2568            | CorruptionScheme::SalesConflict
2569            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2570            // Expert level - deliberate concealment
2571            CorruptionScheme::InvoiceKickback
2572            | CorruptionScheme::CashBribery
2573            | CorruptionScheme::PublicOfficial
2574            | CorruptionScheme::IllegalGratuity
2575            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2576        }
2577    }
2578
2579    /// Returns all variants for iteration.
2580    pub fn all_variants() -> &'static [CorruptionScheme] {
2581        &[
2582            CorruptionScheme::PurchasingConflict,
2583            CorruptionScheme::SalesConflict,
2584            CorruptionScheme::OutsideBusinessInterest,
2585            CorruptionScheme::NepotismConflict,
2586            CorruptionScheme::InvoiceKickback,
2587            CorruptionScheme::BidRigging,
2588            CorruptionScheme::CashBribery,
2589            CorruptionScheme::PublicOfficial,
2590            CorruptionScheme::IllegalGratuity,
2591            CorruptionScheme::EconomicExtortion,
2592        ]
2593    }
2594}
2595
2596/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2597///
2598/// Financial statement fraud involves the intentional misstatement or omission
2599/// of material information in financial reports.
2600#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2601pub enum FinancialStatementScheme {
2602    // ========== Asset/Revenue Overstatement ==========
2603    /// Recording revenue before it is earned.
2604    PrematureRevenue,
2605    /// Deferring expenses to future periods.
2606    DelayedExpenses,
2607    /// Recording revenue for transactions that never occurred.
2608    FictitiousRevenues,
2609    /// Failing to record known liabilities.
2610    ConcealedLiabilities,
2611    /// Overstating the value of assets.
2612    ImproperAssetValuations,
2613    /// Omitting or misstating required disclosures.
2614    ImproperDisclosures,
2615    /// Manipulating timing of revenue recognition (channel stuffing).
2616    ChannelStuffing,
2617    /// Recognizing bill-and-hold revenue improperly.
2618    BillAndHold,
2619    /// Capitalizing expenses that should be expensed.
2620    ImproperCapitalization,
2621
2622    // ========== Asset/Revenue Understatement ==========
2623    /// Understating revenue (often for tax purposes).
2624    UnderstatedRevenues,
2625    /// Recording excessive expenses.
2626    OverstatedExpenses,
2627    /// Recording excessive liabilities or reserves.
2628    OverstatedLiabilities,
2629    /// Undervaluing assets for writedowns/reserves.
2630    ImproperAssetWritedowns,
2631}
2632
2633impl FinancialStatementScheme {
2634    /// Returns the ACFE category this scheme belongs to.
2635    pub fn category(&self) -> AcfeFraudCategory {
2636        AcfeFraudCategory::FinancialStatementFraud
2637    }
2638
2639    /// Returns the subcategory within the ACFE Fraud Tree.
2640    pub fn subcategory(&self) -> &'static str {
2641        match self {
2642            FinancialStatementScheme::UnderstatedRevenues
2643            | FinancialStatementScheme::OverstatedExpenses
2644            | FinancialStatementScheme::OverstatedLiabilities
2645            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2646            _ => "overstatement",
2647        }
2648    }
2649
2650    /// Returns the typical severity (1-5) for this scheme.
2651    pub fn severity(&self) -> u8 {
2652        // All financial statement fraud is high severity
2653        5
2654    }
2655
2656    /// Returns the typical detection difficulty.
2657    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2658        match self {
2659            // Easier to detect with good analytics
2660            FinancialStatementScheme::ChannelStuffing
2661            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2662            // Hard - requires deep analysis
2663            FinancialStatementScheme::PrematureRevenue
2664            | FinancialStatementScheme::ImproperCapitalization
2665            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2666            // Expert level
2667            FinancialStatementScheme::FictitiousRevenues
2668            | FinancialStatementScheme::ConcealedLiabilities
2669            | FinancialStatementScheme::ImproperAssetValuations
2670            | FinancialStatementScheme::ImproperDisclosures
2671            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2672            _ => AnomalyDetectionDifficulty::Hard,
2673        }
2674    }
2675
2676    /// Returns all variants for iteration.
2677    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2678        &[
2679            FinancialStatementScheme::PrematureRevenue,
2680            FinancialStatementScheme::DelayedExpenses,
2681            FinancialStatementScheme::FictitiousRevenues,
2682            FinancialStatementScheme::ConcealedLiabilities,
2683            FinancialStatementScheme::ImproperAssetValuations,
2684            FinancialStatementScheme::ImproperDisclosures,
2685            FinancialStatementScheme::ChannelStuffing,
2686            FinancialStatementScheme::BillAndHold,
2687            FinancialStatementScheme::ImproperCapitalization,
2688            FinancialStatementScheme::UnderstatedRevenues,
2689            FinancialStatementScheme::OverstatedExpenses,
2690            FinancialStatementScheme::OverstatedLiabilities,
2691            FinancialStatementScheme::ImproperAssetWritedowns,
2692        ]
2693    }
2694}
2695
2696/// Unified ACFE scheme type that encompasses all fraud schemes.
2697#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2698pub enum AcfeScheme {
2699    /// Cash-based fraud schemes.
2700    Cash(CashFraudScheme),
2701    /// Inventory and other asset fraud schemes.
2702    Asset(AssetFraudScheme),
2703    /// Corruption schemes.
2704    Corruption(CorruptionScheme),
2705    /// Financial statement fraud schemes.
2706    FinancialStatement(FinancialStatementScheme),
2707}
2708
2709impl AcfeScheme {
2710    /// Returns the ACFE category this scheme belongs to.
2711    pub fn category(&self) -> AcfeFraudCategory {
2712        match self {
2713            AcfeScheme::Cash(s) => s.category(),
2714            AcfeScheme::Asset(s) => s.category(),
2715            AcfeScheme::Corruption(s) => s.category(),
2716            AcfeScheme::FinancialStatement(s) => s.category(),
2717        }
2718    }
2719
2720    /// Returns the severity (1-5) for this scheme.
2721    pub fn severity(&self) -> u8 {
2722        match self {
2723            AcfeScheme::Cash(s) => s.severity(),
2724            AcfeScheme::Asset(s) => s.severity(),
2725            AcfeScheme::Corruption(s) => s.severity(),
2726            AcfeScheme::FinancialStatement(s) => s.severity(),
2727        }
2728    }
2729
2730    /// Returns the detection difficulty for this scheme.
2731    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2732        match self {
2733            AcfeScheme::Cash(s) => s.detection_difficulty(),
2734            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2735            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2736            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2737        }
2738    }
2739}
2740
2741/// How a fraud was detected (from ACFE statistics).
2742#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2743pub enum AcfeDetectionMethod {
2744    /// Tip from employee, customer, vendor, or anonymous source.
2745    Tip,
2746    /// Internal audit procedures.
2747    InternalAudit,
2748    /// Management review and oversight.
2749    ManagementReview,
2750    /// External audit procedures.
2751    ExternalAudit,
2752    /// Account reconciliation discrepancies.
2753    AccountReconciliation,
2754    /// Document examination.
2755    DocumentExamination,
2756    /// Discovered by accident.
2757    ByAccident,
2758    /// Automated monitoring/IT controls.
2759    ItControls,
2760    /// Surveillance or investigation.
2761    Surveillance,
2762    /// Confession by perpetrator.
2763    Confession,
2764    /// Law enforcement notification.
2765    LawEnforcement,
2766    /// Other detection method.
2767    Other,
2768}
2769
2770impl AcfeDetectionMethod {
2771    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2772    pub fn typical_detection_rate(&self) -> f64 {
2773        match self {
2774            AcfeDetectionMethod::Tip => 0.42,
2775            AcfeDetectionMethod::InternalAudit => 0.16,
2776            AcfeDetectionMethod::ManagementReview => 0.12,
2777            AcfeDetectionMethod::ExternalAudit => 0.04,
2778            AcfeDetectionMethod::AccountReconciliation => 0.05,
2779            AcfeDetectionMethod::DocumentExamination => 0.04,
2780            AcfeDetectionMethod::ByAccident => 0.06,
2781            AcfeDetectionMethod::ItControls => 0.03,
2782            AcfeDetectionMethod::Surveillance => 0.02,
2783            AcfeDetectionMethod::Confession => 0.02,
2784            AcfeDetectionMethod::LawEnforcement => 0.01,
2785            AcfeDetectionMethod::Other => 0.03,
2786        }
2787    }
2788
2789    /// Returns all variants for iteration.
2790    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2791        &[
2792            AcfeDetectionMethod::Tip,
2793            AcfeDetectionMethod::InternalAudit,
2794            AcfeDetectionMethod::ManagementReview,
2795            AcfeDetectionMethod::ExternalAudit,
2796            AcfeDetectionMethod::AccountReconciliation,
2797            AcfeDetectionMethod::DocumentExamination,
2798            AcfeDetectionMethod::ByAccident,
2799            AcfeDetectionMethod::ItControls,
2800            AcfeDetectionMethod::Surveillance,
2801            AcfeDetectionMethod::Confession,
2802            AcfeDetectionMethod::LawEnforcement,
2803            AcfeDetectionMethod::Other,
2804        ]
2805    }
2806}
2807
2808/// Department/position of perpetrator (from ACFE statistics).
2809#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2810pub enum PerpetratorDepartment {
2811    /// Accounting, finance, or bookkeeping.
2812    Accounting,
2813    /// Operations or manufacturing.
2814    Operations,
2815    /// Executive/upper management.
2816    Executive,
2817    /// Sales.
2818    Sales,
2819    /// Customer service.
2820    CustomerService,
2821    /// Purchasing/procurement.
2822    Purchasing,
2823    /// Information technology.
2824    It,
2825    /// Human resources.
2826    HumanResources,
2827    /// Administrative/clerical.
2828    Administrative,
2829    /// Warehouse/inventory.
2830    Warehouse,
2831    /// Board of directors.
2832    BoardOfDirectors,
2833    /// Other department.
2834    Other,
2835}
2836
2837impl PerpetratorDepartment {
2838    /// Returns the typical percentage of frauds by department (from ACFE reports).
2839    pub fn typical_occurrence_rate(&self) -> f64 {
2840        match self {
2841            PerpetratorDepartment::Accounting => 0.21,
2842            PerpetratorDepartment::Operations => 0.17,
2843            PerpetratorDepartment::Executive => 0.12,
2844            PerpetratorDepartment::Sales => 0.11,
2845            PerpetratorDepartment::CustomerService => 0.07,
2846            PerpetratorDepartment::Purchasing => 0.06,
2847            PerpetratorDepartment::It => 0.05,
2848            PerpetratorDepartment::HumanResources => 0.04,
2849            PerpetratorDepartment::Administrative => 0.04,
2850            PerpetratorDepartment::Warehouse => 0.03,
2851            PerpetratorDepartment::BoardOfDirectors => 0.02,
2852            PerpetratorDepartment::Other => 0.08,
2853        }
2854    }
2855
2856    /// Returns the typical median loss by perpetrator department.
2857    pub fn typical_median_loss(&self) -> Decimal {
2858        match self {
2859            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2860            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2861            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2862            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2863            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2864            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2865            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2866            _ => Decimal::new(80_000, 0),
2867        }
2868    }
2869}
2870
2871/// Perpetrator position level (from ACFE statistics).
2872#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2873pub enum PerpetratorLevel {
2874    /// Entry-level employee.
2875    Employee,
2876    /// Manager or supervisor.
2877    Manager,
2878    /// Owner, executive, or C-level.
2879    OwnerExecutive,
2880}
2881
2882impl PerpetratorLevel {
2883    /// Returns the typical percentage of frauds by position level.
2884    pub fn typical_occurrence_rate(&self) -> f64 {
2885        match self {
2886            PerpetratorLevel::Employee => 0.42,
2887            PerpetratorLevel::Manager => 0.36,
2888            PerpetratorLevel::OwnerExecutive => 0.22,
2889        }
2890    }
2891
2892    /// Returns the typical median loss by position level.
2893    pub fn typical_median_loss(&self) -> Decimal {
2894        match self {
2895            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2896            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2897            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2898        }
2899    }
2900}
2901
2902/// ACFE Calibration data for fraud generation.
2903///
2904/// Contains statistical parameters based on ACFE Report to the Nations
2905/// for realistic fraud pattern generation.
2906#[derive(Debug, Clone, Serialize, Deserialize)]
2907pub struct AcfeCalibration {
2908    /// Overall median loss for occupational fraud ($117,000 typical).
2909    pub median_loss: Decimal,
2910    /// Median duration in months before detection (12 months typical).
2911    pub median_duration_months: u32,
2912    /// Distribution of fraud by category.
2913    pub category_distribution: HashMap<String, f64>,
2914    /// Distribution of detection methods.
2915    pub detection_method_distribution: HashMap<String, f64>,
2916    /// Distribution by perpetrator department.
2917    pub department_distribution: HashMap<String, f64>,
2918    /// Distribution by perpetrator level.
2919    pub level_distribution: HashMap<String, f64>,
2920    /// Average number of red flags per fraud case.
2921    pub avg_red_flags_per_case: f64,
2922    /// Percentage of frauds involving collusion.
2923    pub collusion_rate: f64,
2924}
2925
2926impl Default for AcfeCalibration {
2927    fn default() -> Self {
2928        let mut category_distribution = HashMap::new();
2929        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2930        category_distribution.insert("corruption".to_string(), 0.33);
2931        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2932
2933        let mut detection_method_distribution = HashMap::new();
2934        for method in AcfeDetectionMethod::all_variants() {
2935            detection_method_distribution.insert(
2936                format!("{method:?}").to_lowercase(),
2937                method.typical_detection_rate(),
2938            );
2939        }
2940
2941        let mut department_distribution = HashMap::new();
2942        department_distribution.insert("accounting".to_string(), 0.21);
2943        department_distribution.insert("operations".to_string(), 0.17);
2944        department_distribution.insert("executive".to_string(), 0.12);
2945        department_distribution.insert("sales".to_string(), 0.11);
2946        department_distribution.insert("customer_service".to_string(), 0.07);
2947        department_distribution.insert("purchasing".to_string(), 0.06);
2948        department_distribution.insert("other".to_string(), 0.26);
2949
2950        let mut level_distribution = HashMap::new();
2951        level_distribution.insert("employee".to_string(), 0.42);
2952        level_distribution.insert("manager".to_string(), 0.36);
2953        level_distribution.insert("owner_executive".to_string(), 0.22);
2954
2955        Self {
2956            median_loss: Decimal::new(117_000, 0),
2957            median_duration_months: 12,
2958            category_distribution,
2959            detection_method_distribution,
2960            department_distribution,
2961            level_distribution,
2962            avg_red_flags_per_case: 2.8,
2963            collusion_rate: 0.50,
2964        }
2965    }
2966}
2967
2968impl AcfeCalibration {
2969    /// Creates a new ACFE calibration with the given parameters.
2970    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2971        Self {
2972            median_loss,
2973            median_duration_months,
2974            ..Self::default()
2975        }
2976    }
2977
2978    /// Returns the median loss for a specific category.
2979    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2980        category.typical_median_loss()
2981    }
2982
2983    /// Returns the median duration for a specific category.
2984    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
2985        category.typical_detection_months()
2986    }
2987
2988    /// Validates the calibration data.
2989    pub fn validate(&self) -> Result<(), String> {
2990        if self.median_loss <= Decimal::ZERO {
2991            return Err("Median loss must be positive".to_string());
2992        }
2993        if self.median_duration_months == 0 {
2994            return Err("Median duration must be at least 1 month".to_string());
2995        }
2996        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
2997            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
2998        }
2999        Ok(())
3000    }
3001}
3002
3003/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3004///
3005/// The fraud triangle is a model for explaining the factors that cause
3006/// someone to commit occupational fraud.
3007#[derive(Debug, Clone, Serialize, Deserialize)]
3008pub struct FraudTriangle {
3009    /// Pressure or incentive to commit fraud.
3010    pub pressure: PressureType,
3011    /// Opportunity factors that enable fraud.
3012    pub opportunities: Vec<OpportunityFactor>,
3013    /// Rationalization used to justify the fraud.
3014    pub rationalization: Rationalization,
3015}
3016
3017impl FraudTriangle {
3018    /// Creates a new fraud triangle.
3019    pub fn new(
3020        pressure: PressureType,
3021        opportunities: Vec<OpportunityFactor>,
3022        rationalization: Rationalization,
3023    ) -> Self {
3024        Self {
3025            pressure,
3026            opportunities,
3027            rationalization,
3028        }
3029    }
3030
3031    /// Returns a risk score based on the fraud triangle components.
3032    pub fn risk_score(&self) -> f64 {
3033        let pressure_score = self.pressure.risk_weight();
3034        let opportunity_score: f64 = self
3035            .opportunities
3036            .iter()
3037            .map(OpportunityFactor::risk_weight)
3038            .sum::<f64>()
3039            / self.opportunities.len().max(1) as f64;
3040        let rationalization_score = self.rationalization.risk_weight();
3041
3042        (pressure_score + opportunity_score + rationalization_score) / 3.0
3043    }
3044}
3045
3046/// Types of pressure/incentive that can lead to fraud.
3047#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3048pub enum PressureType {
3049    // Financial Pressures
3050    /// Personal financial difficulties (debt, lifestyle beyond means).
3051    PersonalFinancialDifficulties,
3052    /// Pressure to meet financial targets/earnings expectations.
3053    FinancialTargets,
3054    /// Market or analyst expectations.
3055    MarketExpectations,
3056    /// Debt covenant compliance requirements.
3057    CovenantCompliance,
3058    /// Credit rating maintenance.
3059    CreditRatingMaintenance,
3060    /// Acquisition/merger valuation pressure.
3061    AcquisitionValuation,
3062
3063    // Non-Financial Pressures
3064    /// Fear of job loss.
3065    JobSecurity,
3066    /// Pressure to maintain status or image.
3067    StatusMaintenance,
3068    /// Gambling addiction.
3069    GamblingAddiction,
3070    /// Substance abuse issues.
3071    SubstanceAbuse,
3072    /// Family pressure or obligations.
3073    FamilyPressure,
3074    /// Greed or desire for more.
3075    Greed,
3076}
3077
3078impl PressureType {
3079    /// Returns the risk weight (0.0-1.0) for this pressure type.
3080    pub fn risk_weight(&self) -> f64 {
3081        match self {
3082            PressureType::PersonalFinancialDifficulties => 0.80,
3083            PressureType::FinancialTargets => 0.75,
3084            PressureType::MarketExpectations => 0.70,
3085            PressureType::CovenantCompliance => 0.85,
3086            PressureType::CreditRatingMaintenance => 0.70,
3087            PressureType::AcquisitionValuation => 0.75,
3088            PressureType::JobSecurity => 0.65,
3089            PressureType::StatusMaintenance => 0.55,
3090            PressureType::GamblingAddiction => 0.90,
3091            PressureType::SubstanceAbuse => 0.85,
3092            PressureType::FamilyPressure => 0.60,
3093            PressureType::Greed => 0.70,
3094        }
3095    }
3096}
3097
3098/// Opportunity factors that enable fraud.
3099#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3100pub enum OpportunityFactor {
3101    /// Weak internal controls.
3102    WeakInternalControls,
3103    /// Lack of segregation of duties.
3104    LackOfSegregation,
3105    /// Override capability.
3106    ManagementOverride,
3107    /// Complex or unusual transactions.
3108    ComplexTransactions,
3109    /// Related party transactions.
3110    RelatedPartyTransactions,
3111    /// Poor tone at the top.
3112    PoorToneAtTop,
3113    /// Inadequate supervision.
3114    InadequateSupervision,
3115    /// Access to assets without accountability.
3116    AssetAccess,
3117    /// Inadequate record keeping.
3118    PoorRecordKeeping,
3119    /// Failure to discipline fraud perpetrators.
3120    LackOfDiscipline,
3121    /// Lack of independent checks.
3122    LackOfIndependentChecks,
3123}
3124
3125impl OpportunityFactor {
3126    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3127    pub fn risk_weight(&self) -> f64 {
3128        match self {
3129            OpportunityFactor::WeakInternalControls => 0.85,
3130            OpportunityFactor::LackOfSegregation => 0.80,
3131            OpportunityFactor::ManagementOverride => 0.90,
3132            OpportunityFactor::ComplexTransactions => 0.70,
3133            OpportunityFactor::RelatedPartyTransactions => 0.75,
3134            OpportunityFactor::PoorToneAtTop => 0.85,
3135            OpportunityFactor::InadequateSupervision => 0.75,
3136            OpportunityFactor::AssetAccess => 0.70,
3137            OpportunityFactor::PoorRecordKeeping => 0.65,
3138            OpportunityFactor::LackOfDiscipline => 0.60,
3139            OpportunityFactor::LackOfIndependentChecks => 0.75,
3140        }
3141    }
3142}
3143
3144/// Rationalizations used by fraud perpetrators.
3145#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3146pub enum Rationalization {
3147    /// "I'm just borrowing; I'll pay it back."
3148    TemporaryBorrowing,
3149    /// "Everyone does it."
3150    EveryoneDoesIt,
3151    /// "It's for the good of the company."
3152    ForTheCompanyGood,
3153    /// "I deserve this; the company owes me."
3154    Entitlement,
3155    /// "I was just following orders."
3156    FollowingOrders,
3157    /// "They won't miss it; they have plenty."
3158    TheyWontMissIt,
3159    /// "I need it more than they do."
3160    NeedItMore,
3161    /// "It's not really stealing."
3162    NotReallyStealing,
3163    /// "I'm underpaid for what I do."
3164    Underpaid,
3165    /// "It's a victimless crime."
3166    VictimlessCrime,
3167}
3168
3169impl Rationalization {
3170    /// Returns the risk weight (0.0-1.0) for this rationalization.
3171    pub fn risk_weight(&self) -> f64 {
3172        match self {
3173            // More dangerous rationalizations
3174            Rationalization::Entitlement => 0.85,
3175            Rationalization::EveryoneDoesIt => 0.80,
3176            Rationalization::NotReallyStealing => 0.80,
3177            Rationalization::TheyWontMissIt => 0.75,
3178            // Medium risk
3179            Rationalization::Underpaid => 0.70,
3180            Rationalization::ForTheCompanyGood => 0.65,
3181            Rationalization::NeedItMore => 0.65,
3182            // Lower risk (still indicates fraud)
3183            Rationalization::TemporaryBorrowing => 0.60,
3184            Rationalization::FollowingOrders => 0.55,
3185            Rationalization::VictimlessCrime => 0.60,
3186        }
3187    }
3188}
3189
3190// ============================================================================
3191// NEAR-MISS TYPES
3192// ============================================================================
3193
3194/// Type of near-miss pattern (suspicious but legitimate).
3195#[derive(Debug, Clone, Serialize, Deserialize)]
3196pub enum NearMissPattern {
3197    /// Transaction very similar to another (possible duplicate but legitimate).
3198    NearDuplicate {
3199        /// Date difference from similar transaction.
3200        date_difference_days: u32,
3201        /// Original transaction ID.
3202        similar_transaction_id: String,
3203    },
3204    /// Amount just below approval threshold (but legitimate).
3205    ThresholdProximity {
3206        /// The threshold being approached.
3207        threshold: Decimal,
3208        /// Percentage of threshold (0.0-1.0).
3209        proximity: f64,
3210    },
3211    /// Unusual but legitimate business pattern.
3212    UnusualLegitimate {
3213        /// Type of legitimate pattern.
3214        pattern_type: LegitimatePatternType,
3215        /// Business justification.
3216        justification: String,
3217    },
3218    /// Error that was caught and corrected.
3219    CorrectedError {
3220        /// Days until correction.
3221        correction_lag_days: u32,
3222        /// Correction document ID.
3223        correction_document_id: String,
3224    },
3225}
3226
3227/// Types of unusual but legitimate business patterns.
3228#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3229pub enum LegitimatePatternType {
3230    /// Year-end bonus payment.
3231    YearEndBonus,
3232    /// Contract prepayment.
3233    ContractPrepayment,
3234    /// Settlement payment.
3235    SettlementPayment,
3236    /// Insurance claim.
3237    InsuranceClaim,
3238    /// One-time vendor payment.
3239    OneTimePayment,
3240    /// Asset disposal.
3241    AssetDisposal,
3242    /// Seasonal inventory buildup.
3243    SeasonalInventory,
3244    /// Promotional spending.
3245    PromotionalSpending,
3246}
3247
3248impl LegitimatePatternType {
3249    /// Returns a description of this pattern type.
3250    pub fn description(&self) -> &'static str {
3251        match self {
3252            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3253            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3254            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3255            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3256            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3257            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3258            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3259            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3260        }
3261    }
3262}
3263
3264/// What might trigger a false positive for this near-miss.
3265#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3266pub enum FalsePositiveTrigger {
3267    /// Amount is near threshold.
3268    AmountNearThreshold,
3269    /// Timing is unusual.
3270    UnusualTiming,
3271    /// Similar to existing transaction.
3272    SimilarTransaction,
3273    /// New counterparty.
3274    NewCounterparty,
3275    /// Account combination unusual.
3276    UnusualAccountCombination,
3277    /// Volume spike.
3278    VolumeSpike,
3279    /// Round amount.
3280    RoundAmount,
3281}
3282
3283/// Label for a near-miss case.
3284#[derive(Debug, Clone, Serialize, Deserialize)]
3285pub struct NearMissLabel {
3286    /// Document ID.
3287    pub document_id: String,
3288    /// The near-miss pattern.
3289    pub pattern: NearMissPattern,
3290    /// How suspicious it appears (0.0-1.0).
3291    pub suspicion_score: f64,
3292    /// What would trigger a false positive.
3293    pub false_positive_trigger: FalsePositiveTrigger,
3294    /// Why this is actually legitimate.
3295    pub explanation: String,
3296}
3297
3298impl NearMissLabel {
3299    /// Creates a new near-miss label.
3300    pub fn new(
3301        document_id: impl Into<String>,
3302        pattern: NearMissPattern,
3303        suspicion_score: f64,
3304        trigger: FalsePositiveTrigger,
3305        explanation: impl Into<String>,
3306    ) -> Self {
3307        Self {
3308            document_id: document_id.into(),
3309            pattern,
3310            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3311            false_positive_trigger: trigger,
3312            explanation: explanation.into(),
3313        }
3314    }
3315}
3316
3317/// Configuration for anomaly rates.
3318#[derive(Debug, Clone, Serialize, Deserialize)]
3319pub struct AnomalyRateConfig {
3320    /// Overall anomaly rate (0.0 - 1.0).
3321    pub total_rate: f64,
3322    /// Fraud rate as proportion of anomalies.
3323    pub fraud_rate: f64,
3324    /// Error rate as proportion of anomalies.
3325    pub error_rate: f64,
3326    /// Process issue rate as proportion of anomalies.
3327    pub process_issue_rate: f64,
3328    /// Statistical anomaly rate as proportion of anomalies.
3329    pub statistical_rate: f64,
3330    /// Relational anomaly rate as proportion of anomalies.
3331    pub relational_rate: f64,
3332}
3333
3334impl Default for AnomalyRateConfig {
3335    fn default() -> Self {
3336        Self {
3337            total_rate: 0.02,         // 2% of transactions are anomalous
3338            fraud_rate: 0.25,         // 25% of anomalies are fraud
3339            error_rate: 0.35,         // 35% of anomalies are errors
3340            process_issue_rate: 0.20, // 20% are process issues
3341            statistical_rate: 0.15,   // 15% are statistical
3342            relational_rate: 0.05,    // 5% are relational
3343        }
3344    }
3345}
3346
3347impl AnomalyRateConfig {
3348    /// Validates that rates sum to approximately 1.0.
3349    pub fn validate(&self) -> Result<(), String> {
3350        let sum = self.fraud_rate
3351            + self.error_rate
3352            + self.process_issue_rate
3353            + self.statistical_rate
3354            + self.relational_rate;
3355
3356        if (sum - 1.0).abs() > 0.01 {
3357            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3358        }
3359
3360        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3361            return Err(format!(
3362                "Total rate must be between 0.0 and 1.0, got {}",
3363                self.total_rate
3364            ));
3365        }
3366
3367        Ok(())
3368    }
3369}
3370
3371#[cfg(test)]
3372#[allow(clippy::unwrap_used)]
3373mod tests {
3374    use super::*;
3375    use rust_decimal_macros::dec;
3376
3377    #[test]
3378    fn test_anomaly_type_category() {
3379        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3380        assert_eq!(fraud.category(), "Fraud");
3381        assert!(fraud.is_intentional());
3382
3383        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3384        assert_eq!(error.category(), "Error");
3385        assert!(!error.is_intentional());
3386    }
3387
3388    #[test]
3389    fn test_labeled_anomaly() {
3390        let anomaly = LabeledAnomaly::new(
3391            "ANO001".to_string(),
3392            AnomalyType::Fraud(FraudType::SelfApproval),
3393            "JE001".to_string(),
3394            "JE".to_string(),
3395            "1000".to_string(),
3396            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3397        )
3398        .with_description("User approved their own expense report")
3399        .with_related_entity("USER001");
3400
3401        assert_eq!(anomaly.severity, 3);
3402        assert!(anomaly.is_injected);
3403        assert_eq!(anomaly.related_entities.len(), 1);
3404    }
3405
3406    #[test]
3407    fn test_labeled_anomaly_with_provenance() {
3408        let anomaly = LabeledAnomaly::new(
3409            "ANO001".to_string(),
3410            AnomalyType::Fraud(FraudType::SelfApproval),
3411            "JE001".to_string(),
3412            "JE".to_string(),
3413            "1000".to_string(),
3414            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3415        )
3416        .with_run_id("run-123")
3417        .with_generation_seed(42)
3418        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3419        .with_structured_strategy(InjectionStrategy::SelfApproval {
3420            user_id: "USER001".to_string(),
3421        })
3422        .with_scenario("scenario-001")
3423        .with_original_document_hash("abc123");
3424
3425        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3426        assert_eq!(anomaly.generation_seed, Some(42));
3427        assert!(anomaly.causal_reason.is_some());
3428        assert!(anomaly.structured_strategy.is_some());
3429        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3430        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3431
3432        // Check that legacy injection_strategy is also set
3433        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3434    }
3435
3436    #[test]
3437    fn test_labeled_anomaly_derivation_chain() {
3438        let parent = LabeledAnomaly::new(
3439            "ANO001".to_string(),
3440            AnomalyType::Fraud(FraudType::DuplicatePayment),
3441            "JE001".to_string(),
3442            "JE".to_string(),
3443            "1000".to_string(),
3444            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3445        );
3446
3447        let child = LabeledAnomaly::new(
3448            "ANO002".to_string(),
3449            AnomalyType::Error(ErrorType::DuplicateEntry),
3450            "JE002".to_string(),
3451            "JE".to_string(),
3452            "1000".to_string(),
3453            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3454        )
3455        .with_parent_anomaly(&parent.anomaly_id);
3456
3457        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3458    }
3459
3460    #[test]
3461    fn test_injection_strategy_description() {
3462        let strategy = InjectionStrategy::AmountManipulation {
3463            original: dec!(1000),
3464            factor: 2.5,
3465        };
3466        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3467        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3468
3469        let strategy = InjectionStrategy::ThresholdAvoidance {
3470            threshold: dec!(10000),
3471            adjusted_amount: dec!(9999),
3472        };
3473        assert_eq!(
3474            strategy.description(),
3475            "Amount adjusted to avoid 10000 threshold"
3476        );
3477
3478        let strategy = InjectionStrategy::DateShift {
3479            days_shifted: -5,
3480            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3481        };
3482        assert_eq!(strategy.description(), "Date backdated by 5 days");
3483
3484        let strategy = InjectionStrategy::DateShift {
3485            days_shifted: 3,
3486            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3487        };
3488        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3489    }
3490
3491    #[test]
3492    fn test_causal_reason_variants() {
3493        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3494        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3495            assert!((base_rate - 0.02).abs() < 0.001);
3496        }
3497
3498        let reason = AnomalyCausalReason::TemporalPattern {
3499            pattern_name: "year_end_spike".to_string(),
3500        };
3501        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3502            assert_eq!(pattern_name, "year_end_spike");
3503        }
3504
3505        let reason = AnomalyCausalReason::ScenarioStep {
3506            scenario_type: "kickback".to_string(),
3507            step_number: 3,
3508        };
3509        if let AnomalyCausalReason::ScenarioStep {
3510            scenario_type,
3511            step_number,
3512        } = reason
3513        {
3514            assert_eq!(scenario_type, "kickback");
3515            assert_eq!(step_number, 3);
3516        }
3517    }
3518
3519    #[test]
3520    fn test_feature_vector_length() {
3521        let anomaly = LabeledAnomaly::new(
3522            "ANO001".to_string(),
3523            AnomalyType::Fraud(FraudType::SelfApproval),
3524            "JE001".to_string(),
3525            "JE".to_string(),
3526            "1000".to_string(),
3527            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3528        );
3529
3530        let features = anomaly.to_features();
3531        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3532        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3533    }
3534
3535    #[test]
3536    fn test_feature_vector_with_provenance() {
3537        let anomaly = LabeledAnomaly::new(
3538            "ANO001".to_string(),
3539            AnomalyType::Fraud(FraudType::SelfApproval),
3540            "JE001".to_string(),
3541            "JE".to_string(),
3542            "1000".to_string(),
3543            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3544        )
3545        .with_scenario("scenario-001")
3546        .with_parent_anomaly("ANO000");
3547
3548        let features = anomaly.to_features();
3549
3550        // Last two features should be 1.0 (has scenario, has parent)
3551        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3552        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3553    }
3554
3555    #[test]
3556    fn test_anomaly_summary() {
3557        let anomalies = vec![
3558            LabeledAnomaly::new(
3559                "ANO001".to_string(),
3560                AnomalyType::Fraud(FraudType::SelfApproval),
3561                "JE001".to_string(),
3562                "JE".to_string(),
3563                "1000".to_string(),
3564                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3565            ),
3566            LabeledAnomaly::new(
3567                "ANO002".to_string(),
3568                AnomalyType::Error(ErrorType::DuplicateEntry),
3569                "JE002".to_string(),
3570                "JE".to_string(),
3571                "1000".to_string(),
3572                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3573            ),
3574        ];
3575
3576        let summary = AnomalySummary::from_anomalies(&anomalies);
3577
3578        assert_eq!(summary.total_count, 2);
3579        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3580        assert_eq!(summary.by_category.get("Error"), Some(&1));
3581    }
3582
3583    #[test]
3584    fn test_rate_config_validation() {
3585        let config = AnomalyRateConfig::default();
3586        assert!(config.validate().is_ok());
3587
3588        let bad_config = AnomalyRateConfig {
3589            fraud_rate: 0.5,
3590            error_rate: 0.5,
3591            process_issue_rate: 0.5, // Sum > 1.0
3592            ..Default::default()
3593        };
3594        assert!(bad_config.validate().is_err());
3595    }
3596
3597    #[test]
3598    fn test_injection_strategy_serialization() {
3599        let strategy = InjectionStrategy::SoDViolation {
3600            duty1: "CreatePO".to_string(),
3601            duty2: "ApprovePO".to_string(),
3602            violating_user: "USER001".to_string(),
3603        };
3604
3605        let json = serde_json::to_string(&strategy).unwrap();
3606        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3607
3608        assert_eq!(strategy, deserialized);
3609    }
3610
3611    #[test]
3612    fn test_labeled_anomaly_serialization_with_provenance() {
3613        let anomaly = LabeledAnomaly::new(
3614            "ANO001".to_string(),
3615            AnomalyType::Fraud(FraudType::SelfApproval),
3616            "JE001".to_string(),
3617            "JE".to_string(),
3618            "1000".to_string(),
3619            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3620        )
3621        .with_run_id("run-123")
3622        .with_generation_seed(42)
3623        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3624
3625        let json = serde_json::to_string(&anomaly).unwrap();
3626        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3627
3628        assert_eq!(anomaly.run_id, deserialized.run_id);
3629        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3630    }
3631
3632    // ========================================
3633    // FR-003 ENHANCED TAXONOMY TESTS
3634    // ========================================
3635
3636    #[test]
3637    fn test_anomaly_category_from_anomaly_type() {
3638        // Fraud mappings
3639        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3640        assert_eq!(
3641            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3642            AnomalyCategory::FictitiousVendor
3643        );
3644
3645        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3646        assert_eq!(
3647            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3648            AnomalyCategory::VendorKickback
3649        );
3650
3651        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3652        assert_eq!(
3653            AnomalyCategory::from_anomaly_type(&fraud_structured),
3654            AnomalyCategory::StructuredTransaction
3655        );
3656
3657        // Error mappings
3658        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3659        assert_eq!(
3660            AnomalyCategory::from_anomaly_type(&error_duplicate),
3661            AnomalyCategory::DuplicatePayment
3662        );
3663
3664        // Process issue mappings
3665        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3666        assert_eq!(
3667            AnomalyCategory::from_anomaly_type(&process_skip),
3668            AnomalyCategory::MissingApproval
3669        );
3670
3671        // Relational mappings
3672        let relational_circular =
3673            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3674        assert_eq!(
3675            AnomalyCategory::from_anomaly_type(&relational_circular),
3676            AnomalyCategory::CircularFlow
3677        );
3678    }
3679
3680    #[test]
3681    fn test_anomaly_category_ordinal() {
3682        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3683        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3684        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3685    }
3686
3687    #[test]
3688    fn test_contributing_factor() {
3689        let factor = ContributingFactor::new(
3690            FactorType::AmountDeviation,
3691            15000.0,
3692            10000.0,
3693            true,
3694            0.5,
3695            "Amount exceeds threshold",
3696        );
3697
3698        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3699        assert_eq!(factor.value, 15000.0);
3700        assert_eq!(factor.threshold, 10000.0);
3701        assert!(factor.direction_greater);
3702
3703        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3704        let contribution = factor.contribution();
3705        assert!((contribution - 0.25).abs() < 0.01);
3706    }
3707
3708    #[test]
3709    fn test_contributing_factor_with_evidence() {
3710        let mut data = HashMap::new();
3711        data.insert("expected".to_string(), "10000".to_string());
3712        data.insert("actual".to_string(), "15000".to_string());
3713
3714        let factor = ContributingFactor::new(
3715            FactorType::AmountDeviation,
3716            15000.0,
3717            10000.0,
3718            true,
3719            0.5,
3720            "Amount deviation detected",
3721        )
3722        .with_evidence("transaction_history", data);
3723
3724        assert!(factor.evidence.is_some());
3725        let evidence = factor.evidence.unwrap();
3726        assert_eq!(evidence.source, "transaction_history");
3727        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3728    }
3729
3730    #[test]
3731    fn test_enhanced_anomaly_label() {
3732        let base = LabeledAnomaly::new(
3733            "ANO001".to_string(),
3734            AnomalyType::Fraud(FraudType::DuplicatePayment),
3735            "JE001".to_string(),
3736            "JE".to_string(),
3737            "1000".to_string(),
3738            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3739        );
3740
3741        let enhanced = EnhancedAnomalyLabel::from_base(base)
3742            .with_confidence(0.85)
3743            .with_severity(0.7)
3744            .with_factor(ContributingFactor::new(
3745                FactorType::DuplicateIndicator,
3746                1.0,
3747                0.5,
3748                true,
3749                0.4,
3750                "Duplicate payment detected",
3751            ))
3752            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3753
3754        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3755        assert_eq!(enhanced.enhanced_confidence, 0.85);
3756        assert_eq!(enhanced.enhanced_severity, 0.7);
3757        assert_eq!(enhanced.contributing_factors.len(), 1);
3758        assert_eq!(enhanced.secondary_categories.len(), 1);
3759    }
3760
3761    #[test]
3762    fn test_enhanced_anomaly_label_features() {
3763        let base = LabeledAnomaly::new(
3764            "ANO001".to_string(),
3765            AnomalyType::Fraud(FraudType::SelfApproval),
3766            "JE001".to_string(),
3767            "JE".to_string(),
3768            "1000".to_string(),
3769            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3770        );
3771
3772        let enhanced = EnhancedAnomalyLabel::from_base(base)
3773            .with_confidence(0.9)
3774            .with_severity(0.8)
3775            .with_factor(ContributingFactor::new(
3776                FactorType::ControlBypass,
3777                1.0,
3778                0.0,
3779                true,
3780                0.5,
3781                "Control bypass detected",
3782            ));
3783
3784        let features = enhanced.to_features();
3785
3786        // Should have 25 features (15 base + 10 enhanced)
3787        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3788        assert_eq!(features.len(), 25);
3789
3790        // Check enhanced confidence is in features
3791        assert_eq!(features[15], 0.9); // enhanced_confidence
3792
3793        // Check has_control_bypass flag
3794        assert_eq!(features[21], 1.0); // has_control_bypass
3795    }
3796
3797    #[test]
3798    fn test_enhanced_anomaly_label_feature_names() {
3799        let names = EnhancedAnomalyLabel::feature_names();
3800        assert_eq!(names.len(), 25);
3801        assert!(names.contains(&"enhanced_confidence"));
3802        assert!(names.contains(&"enhanced_severity"));
3803        assert!(names.contains(&"has_control_bypass"));
3804    }
3805
3806    #[test]
3807    fn test_factor_type_names() {
3808        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3809        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3810        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3811    }
3812
3813    #[test]
3814    fn test_anomaly_category_serialization() {
3815        let category = AnomalyCategory::CircularFlow;
3816        let json = serde_json::to_string(&category).unwrap();
3817        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3818        assert_eq!(category, deserialized);
3819
3820        let custom = AnomalyCategory::Custom("custom_type".to_string());
3821        let json = serde_json::to_string(&custom).unwrap();
3822        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3823        assert_eq!(custom, deserialized);
3824    }
3825
3826    #[test]
3827    fn test_enhanced_label_secondary_category_dedup() {
3828        let base = LabeledAnomaly::new(
3829            "ANO001".to_string(),
3830            AnomalyType::Fraud(FraudType::DuplicatePayment),
3831            "JE001".to_string(),
3832            "JE".to_string(),
3833            "1000".to_string(),
3834            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3835        );
3836
3837        let enhanced = EnhancedAnomalyLabel::from_base(base)
3838            // Try to add the primary category as secondary (should be ignored)
3839            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3840            // Add a valid secondary
3841            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3842            // Try to add duplicate secondary (should be ignored)
3843            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3844
3845        // Should only have 1 secondary category (TimingAnomaly)
3846        assert_eq!(enhanced.secondary_categories.len(), 1);
3847        assert_eq!(
3848            enhanced.secondary_categories[0],
3849            AnomalyCategory::TimingAnomaly
3850        );
3851    }
3852
3853    // ==========================================================================
3854    // Accounting Standards Fraud Type Tests
3855    // ==========================================================================
3856
3857    #[test]
3858    fn test_revenue_recognition_fraud_types() {
3859        // Test ASC 606/IFRS 15 related fraud types
3860        let fraud_types = [
3861            FraudType::ImproperRevenueRecognition,
3862            FraudType::ImproperPoAllocation,
3863            FraudType::VariableConsiderationManipulation,
3864            FraudType::ContractModificationMisstatement,
3865        ];
3866
3867        for fraud_type in fraud_types {
3868            let anomaly_type = AnomalyType::Fraud(fraud_type);
3869            assert_eq!(anomaly_type.category(), "Fraud");
3870            assert!(anomaly_type.is_intentional());
3871            assert!(anomaly_type.severity() >= 3);
3872        }
3873    }
3874
3875    #[test]
3876    fn test_lease_accounting_fraud_types() {
3877        // Test ASC 842/IFRS 16 related fraud types
3878        let fraud_types = [
3879            FraudType::LeaseClassificationManipulation,
3880            FraudType::OffBalanceSheetLease,
3881            FraudType::LeaseLiabilityUnderstatement,
3882            FraudType::RouAssetMisstatement,
3883        ];
3884
3885        for fraud_type in fraud_types {
3886            let anomaly_type = AnomalyType::Fraud(fraud_type);
3887            assert_eq!(anomaly_type.category(), "Fraud");
3888            assert!(anomaly_type.is_intentional());
3889            assert!(anomaly_type.severity() >= 3);
3890        }
3891
3892        // Off-balance sheet lease fraud should be high severity
3893        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3894    }
3895
3896    #[test]
3897    fn test_fair_value_fraud_types() {
3898        // Test ASC 820/IFRS 13 related fraud types
3899        let fraud_types = [
3900            FraudType::FairValueHierarchyManipulation,
3901            FraudType::Level3InputManipulation,
3902            FraudType::ValuationTechniqueManipulation,
3903        ];
3904
3905        for fraud_type in fraud_types {
3906            let anomaly_type = AnomalyType::Fraud(fraud_type);
3907            assert_eq!(anomaly_type.category(), "Fraud");
3908            assert!(anomaly_type.is_intentional());
3909            assert!(anomaly_type.severity() >= 4);
3910        }
3911
3912        // Level 3 manipulation is highest severity (unobservable inputs)
3913        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3914    }
3915
3916    #[test]
3917    fn test_impairment_fraud_types() {
3918        // Test ASC 360/IAS 36 related fraud types
3919        let fraud_types = [
3920            FraudType::DelayedImpairment,
3921            FraudType::ImpairmentTestAvoidance,
3922            FraudType::CashFlowProjectionManipulation,
3923            FraudType::ImproperImpairmentReversal,
3924        ];
3925
3926        for fraud_type in fraud_types {
3927            let anomaly_type = AnomalyType::Fraud(fraud_type);
3928            assert_eq!(anomaly_type.category(), "Fraud");
3929            assert!(anomaly_type.is_intentional());
3930            assert!(anomaly_type.severity() >= 3);
3931        }
3932
3933        // Cash flow manipulation has highest severity
3934        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3935    }
3936
3937    // ==========================================================================
3938    // Accounting Standards Error Type Tests
3939    // ==========================================================================
3940
3941    #[test]
3942    fn test_standards_error_types() {
3943        // Test non-fraudulent accounting standards errors
3944        let error_types = [
3945            ErrorType::RevenueTimingError,
3946            ErrorType::PoAllocationError,
3947            ErrorType::LeaseClassificationError,
3948            ErrorType::LeaseCalculationError,
3949            ErrorType::FairValueError,
3950            ErrorType::ImpairmentCalculationError,
3951            ErrorType::DiscountRateError,
3952            ErrorType::FrameworkApplicationError,
3953        ];
3954
3955        for error_type in error_types {
3956            let anomaly_type = AnomalyType::Error(error_type);
3957            assert_eq!(anomaly_type.category(), "Error");
3958            assert!(!anomaly_type.is_intentional());
3959            assert!(anomaly_type.severity() >= 3);
3960        }
3961    }
3962
3963    #[test]
3964    fn test_framework_application_error() {
3965        // Test IFRS vs GAAP confusion errors
3966        let error_type = ErrorType::FrameworkApplicationError;
3967        assert_eq!(error_type.severity(), 4);
3968
3969        let anomaly = LabeledAnomaly::new(
3970            "ERR001".to_string(),
3971            AnomalyType::Error(error_type),
3972            "JE100".to_string(),
3973            "JE".to_string(),
3974            "1000".to_string(),
3975            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3976        )
3977        .with_description("LIFO inventory method used under IFRS (not permitted)")
3978        .with_metadata("framework", "IFRS")
3979        .with_metadata("standard_violated", "IAS 2");
3980
3981        assert_eq!(anomaly.anomaly_type.category(), "Error");
3982        assert_eq!(
3983            anomaly.metadata.get("standard_violated"),
3984            Some(&"IAS 2".to_string())
3985        );
3986    }
3987
3988    #[test]
3989    fn test_standards_anomaly_serialization() {
3990        // Test that new fraud types serialize/deserialize correctly
3991        let fraud_types = [
3992            FraudType::ImproperRevenueRecognition,
3993            FraudType::LeaseClassificationManipulation,
3994            FraudType::FairValueHierarchyManipulation,
3995            FraudType::DelayedImpairment,
3996        ];
3997
3998        for fraud_type in fraud_types {
3999            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4000            let deserialized: FraudType =
4001                serde_json::from_str(&json).expect("Failed to deserialize");
4002            assert_eq!(fraud_type, deserialized);
4003        }
4004
4005        // Test error types
4006        let error_types = [
4007            ErrorType::RevenueTimingError,
4008            ErrorType::LeaseCalculationError,
4009            ErrorType::FairValueError,
4010            ErrorType::FrameworkApplicationError,
4011        ];
4012
4013        for error_type in error_types {
4014            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4015            let deserialized: ErrorType =
4016                serde_json::from_str(&json).expect("Failed to deserialize");
4017            assert_eq!(error_type, deserialized);
4018        }
4019    }
4020
4021    #[test]
4022    fn test_standards_labeled_anomaly() {
4023        // Test creating a labeled anomaly for a standards violation
4024        let anomaly = LabeledAnomaly::new(
4025            "STD001".to_string(),
4026            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4027            "CONTRACT-2024-001".to_string(),
4028            "Revenue".to_string(),
4029            "1000".to_string(),
4030            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4031        )
4032        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4033        .with_monetary_impact(dec!(500000))
4034        .with_metadata("standard", "ASC 606")
4035        .with_metadata("paragraph", "606-10-25-1")
4036        .with_metadata("contract_id", "C-2024-001")
4037        .with_related_entity("CONTRACT-2024-001")
4038        .with_related_entity("CUSTOMER-500");
4039
4040        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4041        assert!(anomaly.is_injected);
4042        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4043        assert_eq!(anomaly.related_entities.len(), 2);
4044        assert_eq!(
4045            anomaly.metadata.get("standard"),
4046            Some(&"ASC 606".to_string())
4047        );
4048    }
4049
4050    // ==========================================================================
4051    // Multi-Dimensional Labeling Tests
4052    // ==========================================================================
4053
4054    #[test]
4055    fn test_severity_level() {
4056        assert_eq!(SeverityLevel::Low.numeric(), 1);
4057        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4058
4059        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4060        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4061
4062        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4063        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4064
4065        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4066    }
4067
4068    #[test]
4069    fn test_anomaly_severity() {
4070        let severity =
4071            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4072
4073        assert_eq!(severity.level, SeverityLevel::High);
4074        assert!(severity.is_material);
4075        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4076
4077        // Not material
4078        let low_severity =
4079            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4080        assert!(!low_severity.is_material);
4081    }
4082
4083    #[test]
4084    fn test_detection_difficulty() {
4085        assert!(
4086            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4087        );
4088        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4089
4090        assert_eq!(
4091            AnomalyDetectionDifficulty::from_score(0.05),
4092            AnomalyDetectionDifficulty::Trivial
4093        );
4094        assert_eq!(
4095            AnomalyDetectionDifficulty::from_score(0.90),
4096            AnomalyDetectionDifficulty::Expert
4097        );
4098
4099        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4100    }
4101
4102    #[test]
4103    fn test_ground_truth_certainty() {
4104        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4105        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4106        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4107    }
4108
4109    #[test]
4110    fn test_detection_method() {
4111        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4112        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4113    }
4114
4115    #[test]
4116    fn test_extended_anomaly_label() {
4117        let base = LabeledAnomaly::new(
4118            "ANO001".to_string(),
4119            AnomalyType::Fraud(FraudType::FictitiousVendor),
4120            "JE001".to_string(),
4121            "JE".to_string(),
4122            "1000".to_string(),
4123            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4124        )
4125        .with_monetary_impact(dec!(100000));
4126
4127        let extended = ExtendedAnomalyLabel::from_base(base)
4128            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4129            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4130            .with_method(DetectionMethod::GraphBased)
4131            .with_method(DetectionMethod::ForensicAudit)
4132            .with_indicator("New vendor with no history")
4133            .with_indicator("Large first transaction")
4134            .with_certainty(GroundTruthCertainty::Definite)
4135            .with_entity("V001")
4136            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4137            .with_scheme("SCHEME001", 2);
4138
4139        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4140        assert_eq!(
4141            extended.detection_difficulty,
4142            AnomalyDetectionDifficulty::Hard
4143        );
4144        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4145        assert_eq!(extended.recommended_methods.len(), 3);
4146        assert_eq!(extended.key_indicators.len(), 2);
4147        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4148        assert_eq!(extended.scheme_stage, Some(2));
4149    }
4150
4151    #[test]
4152    fn test_extended_anomaly_label_features() {
4153        let base = LabeledAnomaly::new(
4154            "ANO001".to_string(),
4155            AnomalyType::Fraud(FraudType::SelfApproval),
4156            "JE001".to_string(),
4157            "JE".to_string(),
4158            "1000".to_string(),
4159            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4160        );
4161
4162        let extended =
4163            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4164
4165        let features = extended.to_features();
4166        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4167        assert_eq!(features.len(), 30);
4168
4169        // Check difficulty score is in features
4170        let difficulty_idx = 18; // Position of difficulty_score
4171        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4172    }
4173
4174    #[test]
4175    fn test_extended_label_near_miss() {
4176        let base = LabeledAnomaly::new(
4177            "ANO001".to_string(),
4178            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4179            "JE001".to_string(),
4180            "JE".to_string(),
4181            "1000".to_string(),
4182            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4183        );
4184
4185        let extended = ExtendedAnomalyLabel::from_base(base)
4186            .as_near_miss("Year-end bonus payment, legitimately high");
4187
4188        assert!(extended.is_near_miss);
4189        assert!(extended.near_miss_explanation.is_some());
4190    }
4191
4192    #[test]
4193    fn test_scheme_type() {
4194        assert_eq!(
4195            SchemeType::GradualEmbezzlement.name(),
4196            "gradual_embezzlement"
4197        );
4198        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4199        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4200    }
4201
4202    #[test]
4203    fn test_concealment_technique() {
4204        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4205        assert!(
4206            ConcealmentTechnique::Collusion.difficulty_bonus()
4207                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4208        );
4209    }
4210
4211    #[test]
4212    fn test_near_miss_label() {
4213        let near_miss = NearMissLabel::new(
4214            "JE001",
4215            NearMissPattern::ThresholdProximity {
4216                threshold: dec!(10000),
4217                proximity: 0.95,
4218            },
4219            0.7,
4220            FalsePositiveTrigger::AmountNearThreshold,
4221            "Transaction is 95% of threshold but business justified",
4222        );
4223
4224        assert_eq!(near_miss.document_id, "JE001");
4225        assert_eq!(near_miss.suspicion_score, 0.7);
4226        assert_eq!(
4227            near_miss.false_positive_trigger,
4228            FalsePositiveTrigger::AmountNearThreshold
4229        );
4230    }
4231
4232    #[test]
4233    fn test_legitimate_pattern_type() {
4234        assert_eq!(
4235            LegitimatePatternType::YearEndBonus.description(),
4236            "Year-end bonus payment"
4237        );
4238        assert_eq!(
4239            LegitimatePatternType::InsuranceClaim.description(),
4240            "Insurance claim reimbursement"
4241        );
4242    }
4243
4244    #[test]
4245    fn test_severity_detection_difficulty_serialization() {
4246        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4247        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4248        let deserialized: AnomalySeverity =
4249            serde_json::from_str(&json).expect("Failed to deserialize");
4250        assert_eq!(severity.level, deserialized.level);
4251
4252        let difficulty = AnomalyDetectionDifficulty::Hard;
4253        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4254        let deserialized: AnomalyDetectionDifficulty =
4255            serde_json::from_str(&json).expect("Failed to deserialize");
4256        assert_eq!(difficulty, deserialized);
4257    }
4258
4259    // ========================================
4260    // ACFE Taxonomy Tests
4261    // ========================================
4262
4263    #[test]
4264    fn test_acfe_fraud_category() {
4265        let asset = AcfeFraudCategory::AssetMisappropriation;
4266        assert_eq!(asset.name(), "asset_misappropriation");
4267        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4268        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4269        assert_eq!(asset.typical_detection_months(), 12);
4270
4271        let corruption = AcfeFraudCategory::Corruption;
4272        assert_eq!(corruption.name(), "corruption");
4273        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4274
4275        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4276        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4277        assert_eq!(fs_fraud.typical_detection_months(), 24);
4278    }
4279
4280    #[test]
4281    fn test_cash_fraud_scheme() {
4282        let shell = CashFraudScheme::ShellCompany;
4283        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4284        assert_eq!(shell.subcategory(), "billing_schemes");
4285        assert_eq!(shell.severity(), 5);
4286        assert_eq!(
4287            shell.detection_difficulty(),
4288            AnomalyDetectionDifficulty::Hard
4289        );
4290
4291        let ghost = CashFraudScheme::GhostEmployee;
4292        assert_eq!(ghost.subcategory(), "payroll_schemes");
4293        assert_eq!(ghost.severity(), 5);
4294
4295        // Test all variants exist
4296        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4297    }
4298
4299    #[test]
4300    fn test_asset_fraud_scheme() {
4301        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4302        assert_eq!(
4303            ip_theft.category(),
4304            AcfeFraudCategory::AssetMisappropriation
4305        );
4306        assert_eq!(ip_theft.subcategory(), "other_assets");
4307        assert_eq!(ip_theft.severity(), 5);
4308
4309        let inv_theft = AssetFraudScheme::InventoryTheft;
4310        assert_eq!(inv_theft.subcategory(), "inventory");
4311        assert_eq!(inv_theft.severity(), 4);
4312    }
4313
4314    #[test]
4315    fn test_corruption_scheme() {
4316        let kickback = CorruptionScheme::InvoiceKickback;
4317        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4318        assert_eq!(kickback.subcategory(), "bribery");
4319        assert_eq!(kickback.severity(), 5);
4320        assert_eq!(
4321            kickback.detection_difficulty(),
4322            AnomalyDetectionDifficulty::Expert
4323        );
4324
4325        let bid_rigging = CorruptionScheme::BidRigging;
4326        assert_eq!(bid_rigging.subcategory(), "bribery");
4327        assert_eq!(
4328            bid_rigging.detection_difficulty(),
4329            AnomalyDetectionDifficulty::Hard
4330        );
4331
4332        let purchasing = CorruptionScheme::PurchasingConflict;
4333        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4334
4335        // Test all variants exist
4336        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4337    }
4338
4339    #[test]
4340    fn test_financial_statement_scheme() {
4341        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4342        assert_eq!(
4343            fictitious.category(),
4344            AcfeFraudCategory::FinancialStatementFraud
4345        );
4346        assert_eq!(fictitious.subcategory(), "overstatement");
4347        assert_eq!(fictitious.severity(), 5);
4348        assert_eq!(
4349            fictitious.detection_difficulty(),
4350            AnomalyDetectionDifficulty::Expert
4351        );
4352
4353        let understated = FinancialStatementScheme::UnderstatedRevenues;
4354        assert_eq!(understated.subcategory(), "understatement");
4355
4356        // Test all variants exist
4357        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4358    }
4359
4360    #[test]
4361    fn test_acfe_scheme_unified() {
4362        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4363        assert_eq!(
4364            cash_scheme.category(),
4365            AcfeFraudCategory::AssetMisappropriation
4366        );
4367        assert_eq!(cash_scheme.severity(), 5);
4368
4369        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4370        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4371
4372        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4373        assert_eq!(
4374            fs_scheme.category(),
4375            AcfeFraudCategory::FinancialStatementFraud
4376        );
4377    }
4378
4379    #[test]
4380    fn test_acfe_detection_method() {
4381        let tip = AcfeDetectionMethod::Tip;
4382        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4383
4384        let internal_audit = AcfeDetectionMethod::InternalAudit;
4385        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4386
4387        let external_audit = AcfeDetectionMethod::ExternalAudit;
4388        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4389
4390        // Test all variants exist
4391        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4392    }
4393
4394    #[test]
4395    fn test_perpetrator_department() {
4396        let accounting = PerpetratorDepartment::Accounting;
4397        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4398        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4399
4400        let executive = PerpetratorDepartment::Executive;
4401        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4402    }
4403
4404    #[test]
4405    fn test_perpetrator_level() {
4406        let employee = PerpetratorLevel::Employee;
4407        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4408        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4409
4410        let exec = PerpetratorLevel::OwnerExecutive;
4411        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4412    }
4413
4414    #[test]
4415    fn test_acfe_calibration() {
4416        let cal = AcfeCalibration::default();
4417        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4418        assert_eq!(cal.median_duration_months, 12);
4419        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4420        assert!(cal.validate().is_ok());
4421
4422        // Test custom calibration
4423        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4424        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4425        assert_eq!(custom_cal.median_duration_months, 18);
4426
4427        // Test validation failure
4428        let bad_cal = AcfeCalibration {
4429            collusion_rate: 1.5,
4430            ..Default::default()
4431        };
4432        assert!(bad_cal.validate().is_err());
4433    }
4434
4435    #[test]
4436    fn test_fraud_triangle() {
4437        let triangle = FraudTriangle::new(
4438            PressureType::FinancialTargets,
4439            vec![
4440                OpportunityFactor::WeakInternalControls,
4441                OpportunityFactor::ManagementOverride,
4442            ],
4443            Rationalization::ForTheCompanyGood,
4444        );
4445
4446        // Risk score should be between 0 and 1
4447        let risk = triangle.risk_score();
4448        assert!((0.0..=1.0).contains(&risk));
4449        // Should be relatively high given the components
4450        assert!(risk > 0.5);
4451    }
4452
4453    #[test]
4454    fn test_pressure_types() {
4455        let financial = PressureType::FinancialTargets;
4456        assert!(financial.risk_weight() > 0.5);
4457
4458        let gambling = PressureType::GamblingAddiction;
4459        assert_eq!(gambling.risk_weight(), 0.90);
4460    }
4461
4462    #[test]
4463    fn test_opportunity_factors() {
4464        let override_factor = OpportunityFactor::ManagementOverride;
4465        assert_eq!(override_factor.risk_weight(), 0.90);
4466
4467        let weak_controls = OpportunityFactor::WeakInternalControls;
4468        assert!(weak_controls.risk_weight() > 0.8);
4469    }
4470
4471    #[test]
4472    fn test_rationalizations() {
4473        let entitlement = Rationalization::Entitlement;
4474        assert!(entitlement.risk_weight() > 0.8);
4475
4476        let borrowing = Rationalization::TemporaryBorrowing;
4477        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4478    }
4479
4480    #[test]
4481    fn test_acfe_scheme_serialization() {
4482        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4483        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4484        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4485        assert_eq!(scheme, deserialized);
4486
4487        let calibration = AcfeCalibration::default();
4488        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4489        let deserialized: AcfeCalibration =
4490            serde_json::from_str(&json).expect("Failed to deserialize");
4491        assert_eq!(calibration.median_loss, deserialized.median_loss);
4492    }
4493}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs