datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Invoice manipulation.
352    InvoiceManipulation,
353
354    // Asset Fraud
355    /// Misappropriation of assets.
356    AssetMisappropriation,
357    /// Inventory theft.
358    InventoryTheft,
359    /// Ghost employee.
360    GhostEmployee,
361
362    // Financial Statement Fraud
363    /// Premature revenue recognition.
364    PrematureRevenue,
365    /// Understated liabilities.
366    UnderstatedLiabilities,
367    /// Overstated assets.
368    OverstatedAssets,
369    /// Channel stuffing.
370    ChannelStuffing,
371
372    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
373    /// Improper revenue recognition timing (ASC 606/IFRS 15).
374    ImproperRevenueRecognition,
375    /// Multiple performance obligations not properly separated.
376    ImproperPoAllocation,
377    /// Variable consideration not properly estimated.
378    VariableConsiderationManipulation,
379    /// Contract modifications not properly accounted for.
380    ContractModificationMisstatement,
381
382    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
383    /// Lease classification manipulation (operating vs finance).
384    LeaseClassificationManipulation,
385    /// Off-balance sheet lease fraud.
386    OffBalanceSheetLease,
387    /// Lease liability understatement.
388    LeaseLiabilityUnderstatement,
389    /// ROU asset misstatement.
390    RouAssetMisstatement,
391
392    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
393    /// Fair value hierarchy misclassification.
394    FairValueHierarchyManipulation,
395    /// Level 3 input manipulation.
396    Level3InputManipulation,
397    /// Valuation technique manipulation.
398    ValuationTechniqueManipulation,
399
400    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
401    /// Delayed impairment recognition.
402    DelayedImpairment,
403    /// Improperly avoiding impairment testing.
404    ImpairmentTestAvoidance,
405    /// Cash flow projection manipulation for impairment.
406    CashFlowProjectionManipulation,
407    /// Improper impairment reversal (IFRS only).
408    ImproperImpairmentReversal,
409
410    // Sourcing/Procurement Fraud (S2C)
411    /// Bid rigging or collusion among bidders.
412    BidRigging,
413    /// Contracts with phantom/shell vendors.
414    PhantomVendorContract,
415    /// Splitting contracts to avoid approval thresholds.
416    SplitContractThreshold,
417    /// Conflict of interest in sourcing decisions.
418    ConflictOfInterestSourcing,
419
420    // HR/Payroll Fraud (H2R)
421    /// Ghost employee on payroll.
422    GhostEmployeePayroll,
423    /// Payroll inflation/unauthorized raises.
424    PayrollInflation,
425    /// Duplicate expense report submission.
426    DuplicateExpenseReport,
427    /// Fictitious expense claims.
428    FictitiousExpense,
429    /// Splitting expenses to avoid approval threshold.
430    SplitExpenseToAvoidApproval,
431
432    // O2C Fraud
433    /// Revenue timing manipulation via quotes.
434    RevenueTimingManipulation,
435    /// Overriding quote prices without authorization.
436    QuotePriceOverride,
437}
438
439impl FraudType {
440    /// Returns severity level (1-5).
441    pub fn severity(&self) -> u8 {
442        match self {
443            FraudType::RoundDollarManipulation => 2,
444            FraudType::JustBelowThreshold => 3,
445            FraudType::SelfApproval => 3,
446            FraudType::ExceededApprovalLimit => 3,
447            FraudType::DuplicatePayment => 3,
448            FraudType::FictitiousEntry => 4,
449            FraudType::RevenueManipulation => 5,
450            FraudType::FictitiousVendor => 5,
451            FraudType::ShellCompanyPayment => 5,
452            FraudType::AssetMisappropriation => 5,
453            FraudType::SegregationOfDutiesViolation => 4,
454            FraudType::CollusiveApproval => 5,
455            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
456            FraudType::ImproperRevenueRecognition => 5,
457            FraudType::ImproperPoAllocation => 4,
458            FraudType::VariableConsiderationManipulation => 4,
459            FraudType::ContractModificationMisstatement => 3,
460            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
461            FraudType::LeaseClassificationManipulation => 4,
462            FraudType::OffBalanceSheetLease => 5,
463            FraudType::LeaseLiabilityUnderstatement => 4,
464            FraudType::RouAssetMisstatement => 3,
465            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
466            FraudType::FairValueHierarchyManipulation => 4,
467            FraudType::Level3InputManipulation => 5,
468            FraudType::ValuationTechniqueManipulation => 4,
469            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
470            FraudType::DelayedImpairment => 4,
471            FraudType::ImpairmentTestAvoidance => 4,
472            FraudType::CashFlowProjectionManipulation => 5,
473            FraudType::ImproperImpairmentReversal => 3,
474            _ => 4,
475        }
476    }
477}
478
479/// Error types for error detection.
480#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
481pub enum ErrorType {
482    // Data Entry Errors
483    /// Duplicate document entry.
484    DuplicateEntry,
485    /// Reversed debit/credit amounts.
486    ReversedAmount,
487    /// Transposed digits in amount.
488    TransposedDigits,
489    /// Wrong decimal placement.
490    DecimalError,
491    /// Missing required field.
492    MissingField,
493    /// Invalid account code.
494    InvalidAccount,
495
496    // Timing Errors
497    /// Posted to wrong period.
498    WrongPeriod,
499    /// Backdated entry.
500    BackdatedEntry,
501    /// Future-dated entry.
502    FutureDatedEntry,
503    /// Cutoff error.
504    CutoffError,
505
506    // Classification Errors
507    /// Wrong account classification.
508    MisclassifiedAccount,
509    /// Wrong cost center.
510    WrongCostCenter,
511    /// Wrong company code.
512    WrongCompanyCode,
513
514    // Calculation Errors
515    /// Unbalanced journal entry.
516    UnbalancedEntry,
517    /// Rounding error.
518    RoundingError,
519    /// Currency conversion error.
520    CurrencyError,
521    /// Tax calculation error.
522    TaxCalculationError,
523
524    // Accounting Standards Errors (Non-Fraudulent)
525    /// Wrong revenue recognition timing (honest mistake).
526    RevenueTimingError,
527    /// Performance obligation allocation error.
528    PoAllocationError,
529    /// Lease classification error (operating vs finance).
530    LeaseClassificationError,
531    /// Lease calculation error (PV, amortization).
532    LeaseCalculationError,
533    /// Fair value measurement error.
534    FairValueError,
535    /// Impairment calculation error.
536    ImpairmentCalculationError,
537    /// Discount rate error.
538    DiscountRateError,
539    /// Framework application error (IFRS vs GAAP).
540    FrameworkApplicationError,
541}
542
543impl ErrorType {
544    /// Returns severity level (1-5).
545    pub fn severity(&self) -> u8 {
546        match self {
547            ErrorType::RoundingError => 1,
548            ErrorType::MissingField => 2,
549            ErrorType::TransposedDigits => 2,
550            ErrorType::DecimalError => 3,
551            ErrorType::DuplicateEntry => 3,
552            ErrorType::ReversedAmount => 3,
553            ErrorType::WrongPeriod => 4,
554            ErrorType::UnbalancedEntry => 5,
555            ErrorType::CurrencyError => 4,
556            // Accounting Standards Errors
557            ErrorType::RevenueTimingError => 4,
558            ErrorType::PoAllocationError => 3,
559            ErrorType::LeaseClassificationError => 3,
560            ErrorType::LeaseCalculationError => 3,
561            ErrorType::FairValueError => 4,
562            ErrorType::ImpairmentCalculationError => 4,
563            ErrorType::DiscountRateError => 3,
564            ErrorType::FrameworkApplicationError => 4,
565            _ => 3,
566        }
567    }
568}
569
570/// Process issue types.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
572pub enum ProcessIssueType {
573    // Approval Issues
574    /// Approval skipped entirely.
575    SkippedApproval,
576    /// Late approval (after posting).
577    LateApproval,
578    /// Missing supporting documentation.
579    MissingDocumentation,
580    /// Incomplete approval chain.
581    IncompleteApprovalChain,
582
583    // Timing Issues
584    /// Late posting.
585    LatePosting,
586    /// Posting outside business hours.
587    AfterHoursPosting,
588    /// Weekend/holiday posting.
589    WeekendPosting,
590    /// Rushed period-end posting.
591    RushedPeriodEnd,
592
593    // Control Issues
594    /// Manual override of system control.
595    ManualOverride,
596    /// Unusual user access pattern.
597    UnusualAccess,
598    /// System bypass.
599    SystemBypass,
600    /// Batch processing anomaly.
601    BatchAnomaly,
602
603    // Documentation Issues
604    /// Vague or missing description.
605    VagueDescription,
606    /// Changed after posting.
607    PostFactoChange,
608    /// Incomplete audit trail.
609    IncompleteAuditTrail,
610
611    // Sourcing/Procurement Issues (S2C)
612    /// Purchasing outside of contracts (maverick spend).
613    MaverickSpend,
614    /// Purchasing against an expired contract.
615    ExpiredContractPurchase,
616    /// Overriding contracted price without authorization.
617    ContractPriceOverride,
618    /// Award given with only a single bid received.
619    SingleBidAward,
620    /// Bypassing supplier qualification requirements.
621    QualificationBypass,
622
623    // O2C Issues
624    /// Converting an expired quote to a sales order.
625    ExpiredQuoteConversion,
626}
627
628impl ProcessIssueType {
629    /// Returns severity level (1-5).
630    pub fn severity(&self) -> u8 {
631        match self {
632            ProcessIssueType::VagueDescription => 1,
633            ProcessIssueType::LatePosting => 2,
634            ProcessIssueType::AfterHoursPosting => 2,
635            ProcessIssueType::WeekendPosting => 2,
636            ProcessIssueType::SkippedApproval => 4,
637            ProcessIssueType::ManualOverride => 4,
638            ProcessIssueType::SystemBypass => 5,
639            ProcessIssueType::IncompleteAuditTrail => 4,
640            _ => 3,
641        }
642    }
643}
644
645/// Statistical anomaly types.
646#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
647pub enum StatisticalAnomalyType {
648    // Amount Anomalies
649    /// Amount significantly above normal.
650    UnusuallyHighAmount,
651    /// Amount significantly below normal.
652    UnusuallyLowAmount,
653    /// Violates Benford's Law distribution.
654    BenfordViolation,
655    /// Exact duplicate amount (suspicious).
656    ExactDuplicateAmount,
657    /// Repeating pattern in amounts.
658    RepeatingAmount,
659
660    // Frequency Anomalies
661    /// Unusual transaction frequency.
662    UnusualFrequency,
663    /// Burst of transactions.
664    TransactionBurst,
665    /// Unusual time of day.
666    UnusualTiming,
667
668    // Trend Anomalies
669    /// Break in historical trend.
670    TrendBreak,
671    /// Sudden level shift.
672    LevelShift,
673    /// Seasonal pattern violation.
674    SeasonalAnomaly,
675
676    // Distribution Anomalies
677    /// Outlier in distribution.
678    StatisticalOutlier,
679    /// Change in variance.
680    VarianceChange,
681    /// Distribution shift.
682    DistributionShift,
683
684    // Sourcing/Contract Anomalies
685    /// Pattern of SLA breaches from a vendor.
686    SlaBreachPattern,
687    /// Contract with zero utilization.
688    UnusedContract,
689
690    // HR/Payroll Anomalies
691    /// Anomalous overtime patterns.
692    OvertimeAnomaly,
693}
694
695impl StatisticalAnomalyType {
696    /// Returns severity level (1-5).
697    pub fn severity(&self) -> u8 {
698        match self {
699            StatisticalAnomalyType::UnusualTiming => 1,
700            StatisticalAnomalyType::UnusualFrequency => 2,
701            StatisticalAnomalyType::BenfordViolation => 2,
702            StatisticalAnomalyType::UnusuallyHighAmount => 3,
703            StatisticalAnomalyType::TrendBreak => 3,
704            StatisticalAnomalyType::TransactionBurst => 4,
705            StatisticalAnomalyType::ExactDuplicateAmount => 3,
706            _ => 3,
707        }
708    }
709}
710
711/// Relational/graph anomaly types.
712#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
713pub enum RelationalAnomalyType {
714    // Transaction Pattern Anomalies
715    /// Circular transaction pattern.
716    CircularTransaction,
717    /// Unusual account combination.
718    UnusualAccountPair,
719    /// New trading partner.
720    NewCounterparty,
721    /// Dormant account suddenly active.
722    DormantAccountActivity,
723
724    // Network Anomalies
725    /// Unusual network centrality.
726    CentralityAnomaly,
727    /// Isolated transaction cluster.
728    IsolatedCluster,
729    /// Bridge node anomaly.
730    BridgeNodeAnomaly,
731    /// Community structure change.
732    CommunityAnomaly,
733
734    // Relationship Anomalies
735    /// Missing expected relationship.
736    MissingRelationship,
737    /// Unexpected relationship.
738    UnexpectedRelationship,
739    /// Relationship strength change.
740    RelationshipStrengthChange,
741
742    // Intercompany Anomalies
743    /// Unmatched intercompany transaction.
744    UnmatchedIntercompany,
745    /// Circular intercompany flow.
746    CircularIntercompany,
747    /// Transfer pricing anomaly.
748    TransferPricingAnomaly,
749}
750
751impl RelationalAnomalyType {
752    /// Returns severity level (1-5).
753    pub fn severity(&self) -> u8 {
754        match self {
755            RelationalAnomalyType::NewCounterparty => 1,
756            RelationalAnomalyType::DormantAccountActivity => 2,
757            RelationalAnomalyType::UnusualAccountPair => 2,
758            RelationalAnomalyType::CircularTransaction => 4,
759            RelationalAnomalyType::CircularIntercompany => 4,
760            RelationalAnomalyType::TransferPricingAnomaly => 4,
761            RelationalAnomalyType::UnmatchedIntercompany => 3,
762            _ => 3,
763        }
764    }
765}
766
767/// A labeled anomaly for supervised learning.
768#[derive(Debug, Clone, Serialize, Deserialize)]
769pub struct LabeledAnomaly {
770    /// Unique anomaly identifier.
771    pub anomaly_id: String,
772    /// Type of anomaly.
773    pub anomaly_type: AnomalyType,
774    /// Document or entity that contains the anomaly.
775    pub document_id: String,
776    /// Document type (JE, PO, Invoice, etc.).
777    pub document_type: String,
778    /// Company code.
779    pub company_code: String,
780    /// Date the anomaly occurred.
781    pub anomaly_date: NaiveDate,
782    /// Timestamp when detected/injected.
783    pub detection_timestamp: NaiveDateTime,
784    /// Confidence score (0.0 - 1.0) for injected anomalies.
785    pub confidence: f64,
786    /// Severity (1-5).
787    pub severity: u8,
788    /// Description of the anomaly.
789    pub description: String,
790    /// Related entities (user IDs, account codes, etc.).
791    pub related_entities: Vec<String>,
792    /// Monetary impact if applicable.
793    pub monetary_impact: Option<Decimal>,
794    /// Additional metadata.
795    pub metadata: HashMap<String, String>,
796    /// Whether this was injected (true) or naturally occurring (false).
797    pub is_injected: bool,
798    /// Injection strategy used (if injected) - legacy string field.
799    pub injection_strategy: Option<String>,
800    /// Cluster ID if part of an anomaly cluster.
801    pub cluster_id: Option<String>,
802
803    // ========================================
804    // PROVENANCE TRACKING FIELDS (Phase 1.2)
805    // ========================================
806    /// Hash of the original document before modification.
807    /// Enables tracking what the document looked like pre-injection.
808    #[serde(default, skip_serializing_if = "Option::is_none")]
809    pub original_document_hash: Option<String>,
810
811    /// Causal reason explaining why this anomaly was injected.
812    /// Provides "why" tracking for each anomaly.
813    #[serde(default, skip_serializing_if = "Option::is_none")]
814    pub causal_reason: Option<AnomalyCausalReason>,
815
816    /// Structured injection strategy with parameters.
817    /// More detailed than the legacy string-based injection_strategy field.
818    #[serde(default, skip_serializing_if = "Option::is_none")]
819    pub structured_strategy: Option<InjectionStrategy>,
820
821    /// Parent anomaly ID if this was derived from another anomaly.
822    /// Enables anomaly transformation chains.
823    #[serde(default, skip_serializing_if = "Option::is_none")]
824    pub parent_anomaly_id: Option<String>,
825
826    /// Child anomaly IDs that were derived from this anomaly.
827    #[serde(default, skip_serializing_if = "Vec::is_empty")]
828    pub child_anomaly_ids: Vec<String>,
829
830    /// Scenario ID if this anomaly is part of a multi-step scenario.
831    #[serde(default, skip_serializing_if = "Option::is_none")]
832    pub scenario_id: Option<String>,
833
834    /// Generation run ID that produced this anomaly.
835    /// Enables tracing anomalies back to their generation run.
836    #[serde(default, skip_serializing_if = "Option::is_none")]
837    pub run_id: Option<String>,
838
839    /// Seed used for RNG during generation.
840    /// Enables reproducibility.
841    #[serde(default, skip_serializing_if = "Option::is_none")]
842    pub generation_seed: Option<u64>,
843}
844
845impl LabeledAnomaly {
846    /// Creates a new labeled anomaly.
847    pub fn new(
848        anomaly_id: String,
849        anomaly_type: AnomalyType,
850        document_id: String,
851        document_type: String,
852        company_code: String,
853        anomaly_date: NaiveDate,
854    ) -> Self {
855        let severity = anomaly_type.severity();
856        let description = format!(
857            "{} - {} in document {}",
858            anomaly_type.category(),
859            anomaly_type.type_name(),
860            document_id
861        );
862
863        Self {
864            anomaly_id,
865            anomaly_type,
866            document_id,
867            document_type,
868            company_code,
869            anomaly_date,
870            detection_timestamp: chrono::Local::now().naive_local(),
871            confidence: 1.0,
872            severity,
873            description,
874            related_entities: Vec::new(),
875            monetary_impact: None,
876            metadata: HashMap::new(),
877            is_injected: true,
878            injection_strategy: None,
879            cluster_id: None,
880            // Provenance fields
881            original_document_hash: None,
882            causal_reason: None,
883            structured_strategy: None,
884            parent_anomaly_id: None,
885            child_anomaly_ids: Vec::new(),
886            scenario_id: None,
887            run_id: None,
888            generation_seed: None,
889        }
890    }
891
892    /// Sets the description.
893    pub fn with_description(mut self, description: &str) -> Self {
894        self.description = description.to_string();
895        self
896    }
897
898    /// Sets the monetary impact.
899    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
900        self.monetary_impact = Some(impact);
901        self
902    }
903
904    /// Adds a related entity.
905    pub fn with_related_entity(mut self, entity: &str) -> Self {
906        self.related_entities.push(entity.to_string());
907        self
908    }
909
910    /// Adds metadata.
911    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
912        self.metadata.insert(key.to_string(), value.to_string());
913        self
914    }
915
916    /// Sets the injection strategy (legacy string).
917    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
918        self.injection_strategy = Some(strategy.to_string());
919        self
920    }
921
922    /// Sets the cluster ID.
923    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
924        self.cluster_id = Some(cluster_id.to_string());
925        self
926    }
927
928    // ========================================
929    // PROVENANCE BUILDER METHODS (Phase 1.2)
930    // ========================================
931
932    /// Sets the original document hash for provenance tracking.
933    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
934        self.original_document_hash = Some(hash.to_string());
935        self
936    }
937
938    /// Sets the causal reason for this anomaly.
939    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
940        self.causal_reason = Some(reason);
941        self
942    }
943
944    /// Sets the structured injection strategy.
945    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
946        // Also set the legacy string field for backward compatibility
947        self.injection_strategy = Some(strategy.strategy_type().to_string());
948        self.structured_strategy = Some(strategy);
949        self
950    }
951
952    /// Sets the parent anomaly ID (for anomaly derivation chains).
953    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
954        self.parent_anomaly_id = Some(parent_id.to_string());
955        self
956    }
957
958    /// Adds a child anomaly ID.
959    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
960        self.child_anomaly_ids.push(child_id.to_string());
961        self
962    }
963
964    /// Sets the scenario ID for multi-step scenario tracking.
965    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
966        self.scenario_id = Some(scenario_id.to_string());
967        self
968    }
969
970    /// Sets the generation run ID.
971    pub fn with_run_id(mut self, run_id: &str) -> Self {
972        self.run_id = Some(run_id.to_string());
973        self
974    }
975
976    /// Sets the generation seed for reproducibility.
977    pub fn with_generation_seed(mut self, seed: u64) -> Self {
978        self.generation_seed = Some(seed);
979        self
980    }
981
982    /// Sets multiple provenance fields at once for convenience.
983    pub fn with_provenance(
984        mut self,
985        run_id: Option<&str>,
986        seed: Option<u64>,
987        causal_reason: Option<AnomalyCausalReason>,
988    ) -> Self {
989        if let Some(id) = run_id {
990            self.run_id = Some(id.to_string());
991        }
992        self.generation_seed = seed;
993        self.causal_reason = causal_reason;
994        self
995    }
996
997    /// Converts to a feature vector for ML.
998    ///
999    /// Returns a vector of 15 features:
1000    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1001    /// - 1 feature: Severity (normalized 0-1)
1002    /// - 1 feature: Confidence
1003    /// - 1 feature: Has monetary impact (0/1)
1004    /// - 1 feature: Monetary impact (log-scaled)
1005    /// - 1 feature: Is intentional (0/1)
1006    /// - 1 feature: Number of related entities
1007    /// - 1 feature: Is part of cluster (0/1)
1008    /// - 1 feature: Is part of scenario (0/1)
1009    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1010    pub fn to_features(&self) -> Vec<f64> {
1011        let mut features = Vec::new();
1012
1013        // Category one-hot encoding
1014        let categories = [
1015            "Fraud",
1016            "Error",
1017            "ProcessIssue",
1018            "Statistical",
1019            "Relational",
1020            "Custom",
1021        ];
1022        for cat in &categories {
1023            features.push(if self.anomaly_type.category() == *cat {
1024                1.0
1025            } else {
1026                0.0
1027            });
1028        }
1029
1030        // Severity (normalized)
1031        features.push(self.severity as f64 / 5.0);
1032
1033        // Confidence
1034        features.push(self.confidence);
1035
1036        // Has monetary impact
1037        features.push(if self.monetary_impact.is_some() {
1038            1.0
1039        } else {
1040            0.0
1041        });
1042
1043        // Monetary impact (log-scaled)
1044        if let Some(impact) = self.monetary_impact {
1045            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1046            features.push((impact_f64.abs() + 1.0).ln());
1047        } else {
1048            features.push(0.0);
1049        }
1050
1051        // Is intentional
1052        features.push(if self.anomaly_type.is_intentional() {
1053            1.0
1054        } else {
1055            0.0
1056        });
1057
1058        // Number of related entities
1059        features.push(self.related_entities.len() as f64);
1060
1061        // Is part of cluster
1062        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1063
1064        // Provenance features
1065        // Is part of scenario
1066        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1067
1068        // Has parent anomaly (indicates this is a derived anomaly)
1069        features.push(if self.parent_anomaly_id.is_some() {
1070            1.0
1071        } else {
1072            0.0
1073        });
1074
1075        features
1076    }
1077
1078    /// Returns the number of features in the feature vector.
1079    pub fn feature_count() -> usize {
1080        15 // 6 category + 9 other features
1081    }
1082
1083    /// Returns feature names for documentation/ML metadata.
1084    pub fn feature_names() -> Vec<&'static str> {
1085        vec![
1086            "category_fraud",
1087            "category_error",
1088            "category_process_issue",
1089            "category_statistical",
1090            "category_relational",
1091            "category_custom",
1092            "severity_normalized",
1093            "confidence",
1094            "has_monetary_impact",
1095            "monetary_impact_log",
1096            "is_intentional",
1097            "related_entity_count",
1098            "is_clustered",
1099            "is_scenario_part",
1100            "is_derived",
1101        ]
1102    }
1103}
1104
1105/// Summary of anomalies for reporting.
1106#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1107pub struct AnomalySummary {
1108    /// Total anomaly count.
1109    pub total_count: usize,
1110    /// Count by category.
1111    pub by_category: HashMap<String, usize>,
1112    /// Count by specific type.
1113    pub by_type: HashMap<String, usize>,
1114    /// Count by severity.
1115    pub by_severity: HashMap<u8, usize>,
1116    /// Count by company.
1117    pub by_company: HashMap<String, usize>,
1118    /// Total monetary impact.
1119    pub total_monetary_impact: Decimal,
1120    /// Date range.
1121    pub date_range: Option<(NaiveDate, NaiveDate)>,
1122    /// Number of clusters.
1123    pub cluster_count: usize,
1124}
1125
1126impl AnomalySummary {
1127    /// Creates a summary from a list of anomalies.
1128    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1129        let mut summary = AnomalySummary {
1130            total_count: anomalies.len(),
1131            ..Default::default()
1132        };
1133
1134        let mut min_date: Option<NaiveDate> = None;
1135        let mut max_date: Option<NaiveDate> = None;
1136        let mut clusters = std::collections::HashSet::new();
1137
1138        for anomaly in anomalies {
1139            // By category
1140            *summary
1141                .by_category
1142                .entry(anomaly.anomaly_type.category().to_string())
1143                .or_insert(0) += 1;
1144
1145            // By type
1146            *summary
1147                .by_type
1148                .entry(anomaly.anomaly_type.type_name())
1149                .or_insert(0) += 1;
1150
1151            // By severity
1152            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1153
1154            // By company
1155            *summary
1156                .by_company
1157                .entry(anomaly.company_code.clone())
1158                .or_insert(0) += 1;
1159
1160            // Monetary impact
1161            if let Some(impact) = anomaly.monetary_impact {
1162                summary.total_monetary_impact += impact;
1163            }
1164
1165            // Date range
1166            match min_date {
1167                None => min_date = Some(anomaly.anomaly_date),
1168                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1169                _ => {}
1170            }
1171            match max_date {
1172                None => max_date = Some(anomaly.anomaly_date),
1173                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1174                _ => {}
1175            }
1176
1177            // Clusters
1178            if let Some(cluster_id) = &anomaly.cluster_id {
1179                clusters.insert(cluster_id.clone());
1180            }
1181        }
1182
1183        summary.date_range = min_date.zip(max_date);
1184        summary.cluster_count = clusters.len();
1185
1186        summary
1187    }
1188}
1189
1190// ============================================================================
1191// ENHANCED ANOMALY TAXONOMY (FR-003)
1192// ============================================================================
1193
1194/// High-level anomaly category for multi-class classification.
1195///
1196/// These categories provide a more granular classification than the base
1197/// AnomalyType enum, enabling better ML model training and audit reporting.
1198#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1199pub enum AnomalyCategory {
1200    // Vendor-related anomalies
1201    /// Fictitious or shell vendor.
1202    FictitiousVendor,
1203    /// Kickback or collusion with vendor.
1204    VendorKickback,
1205    /// Related party vendor transactions.
1206    RelatedPartyVendor,
1207
1208    // Transaction-related anomalies
1209    /// Duplicate payment or invoice.
1210    DuplicatePayment,
1211    /// Unauthorized transaction.
1212    UnauthorizedTransaction,
1213    /// Structured transactions to avoid thresholds.
1214    StructuredTransaction,
1215
1216    // Pattern-based anomalies
1217    /// Circular flow of funds.
1218    CircularFlow,
1219    /// Behavioral anomaly (deviation from normal patterns).
1220    BehavioralAnomaly,
1221    /// Timing-based anomaly.
1222    TimingAnomaly,
1223
1224    // Journal entry anomalies
1225    /// Manual journal entry anomaly.
1226    JournalAnomaly,
1227    /// Manual override of controls.
1228    ManualOverride,
1229    /// Missing approval in chain.
1230    MissingApproval,
1231
1232    // Statistical anomalies
1233    /// Statistical outlier.
1234    StatisticalOutlier,
1235    /// Distribution anomaly (Benford, etc.).
1236    DistributionAnomaly,
1237
1238    // Custom category
1239    /// User-defined category.
1240    Custom(String),
1241}
1242
1243impl AnomalyCategory {
1244    /// Derives an AnomalyCategory from an AnomalyType.
1245    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1246        match anomaly_type {
1247            AnomalyType::Fraud(fraud_type) => match fraud_type {
1248                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1249                    AnomalyCategory::FictitiousVendor
1250                }
1251                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1252                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1253                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1254                    AnomalyCategory::StructuredTransaction
1255                }
1256                FraudType::SelfApproval
1257                | FraudType::UnauthorizedApproval
1258                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1259                FraudType::TimingAnomaly
1260                | FraudType::RoundDollarManipulation
1261                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1262                _ => AnomalyCategory::BehavioralAnomaly,
1263            },
1264            AnomalyType::Error(error_type) => match error_type {
1265                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1266                ErrorType::WrongPeriod
1267                | ErrorType::BackdatedEntry
1268                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1269                _ => AnomalyCategory::JournalAnomaly,
1270            },
1271            AnomalyType::ProcessIssue(process_type) => match process_type {
1272                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1273                    AnomalyCategory::MissingApproval
1274                }
1275                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1276                    AnomalyCategory::ManualOverride
1277                }
1278                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1279                    AnomalyCategory::TimingAnomaly
1280                }
1281                _ => AnomalyCategory::BehavioralAnomaly,
1282            },
1283            AnomalyType::Statistical(stat_type) => match stat_type {
1284                StatisticalAnomalyType::BenfordViolation
1285                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1286                _ => AnomalyCategory::StatisticalOutlier,
1287            },
1288            AnomalyType::Relational(rel_type) => match rel_type {
1289                RelationalAnomalyType::CircularTransaction
1290                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1291                _ => AnomalyCategory::BehavioralAnomaly,
1292            },
1293            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1294        }
1295    }
1296
1297    /// Returns the category name as a string.
1298    pub fn name(&self) -> &str {
1299        match self {
1300            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1301            AnomalyCategory::VendorKickback => "vendor_kickback",
1302            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1303            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1304            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1305            AnomalyCategory::StructuredTransaction => "structured_transaction",
1306            AnomalyCategory::CircularFlow => "circular_flow",
1307            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1308            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1309            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1310            AnomalyCategory::ManualOverride => "manual_override",
1311            AnomalyCategory::MissingApproval => "missing_approval",
1312            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1313            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1314            AnomalyCategory::Custom(s) => s.as_str(),
1315        }
1316    }
1317
1318    /// Returns the ordinal value for ML encoding.
1319    pub fn ordinal(&self) -> u8 {
1320        match self {
1321            AnomalyCategory::FictitiousVendor => 0,
1322            AnomalyCategory::VendorKickback => 1,
1323            AnomalyCategory::RelatedPartyVendor => 2,
1324            AnomalyCategory::DuplicatePayment => 3,
1325            AnomalyCategory::UnauthorizedTransaction => 4,
1326            AnomalyCategory::StructuredTransaction => 5,
1327            AnomalyCategory::CircularFlow => 6,
1328            AnomalyCategory::BehavioralAnomaly => 7,
1329            AnomalyCategory::TimingAnomaly => 8,
1330            AnomalyCategory::JournalAnomaly => 9,
1331            AnomalyCategory::ManualOverride => 10,
1332            AnomalyCategory::MissingApproval => 11,
1333            AnomalyCategory::StatisticalOutlier => 12,
1334            AnomalyCategory::DistributionAnomaly => 13,
1335            AnomalyCategory::Custom(_) => 14,
1336        }
1337    }
1338
1339    /// Returns the total number of categories (excluding Custom).
1340    pub fn category_count() -> usize {
1341        15 // 14 fixed categories + Custom
1342    }
1343}
1344
1345/// Type of contributing factor for anomaly confidence/severity calculation.
1346#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1347pub enum FactorType {
1348    /// Amount deviation from expected value.
1349    AmountDeviation,
1350    /// Proximity to approval/reporting threshold.
1351    ThresholdProximity,
1352    /// Timing-related anomaly indicator.
1353    TimingAnomaly,
1354    /// Entity risk score contribution.
1355    EntityRisk,
1356    /// Pattern match confidence.
1357    PatternMatch,
1358    /// Frequency deviation from normal.
1359    FrequencyDeviation,
1360    /// Relationship-based anomaly indicator.
1361    RelationshipAnomaly,
1362    /// Control bypass indicator.
1363    ControlBypass,
1364    /// Benford's Law violation.
1365    BenfordViolation,
1366    /// Duplicate indicator.
1367    DuplicateIndicator,
1368    /// Approval chain issue.
1369    ApprovalChainIssue,
1370    /// Documentation gap.
1371    DocumentationGap,
1372    /// Custom factor type.
1373    Custom,
1374}
1375
1376impl FactorType {
1377    /// Returns the factor type name.
1378    pub fn name(&self) -> &'static str {
1379        match self {
1380            FactorType::AmountDeviation => "amount_deviation",
1381            FactorType::ThresholdProximity => "threshold_proximity",
1382            FactorType::TimingAnomaly => "timing_anomaly",
1383            FactorType::EntityRisk => "entity_risk",
1384            FactorType::PatternMatch => "pattern_match",
1385            FactorType::FrequencyDeviation => "frequency_deviation",
1386            FactorType::RelationshipAnomaly => "relationship_anomaly",
1387            FactorType::ControlBypass => "control_bypass",
1388            FactorType::BenfordViolation => "benford_violation",
1389            FactorType::DuplicateIndicator => "duplicate_indicator",
1390            FactorType::ApprovalChainIssue => "approval_chain_issue",
1391            FactorType::DocumentationGap => "documentation_gap",
1392            FactorType::Custom => "custom",
1393        }
1394    }
1395}
1396
1397/// Evidence supporting a contributing factor.
1398#[derive(Debug, Clone, Serialize, Deserialize)]
1399pub struct FactorEvidence {
1400    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1401    pub source: String,
1402    /// Raw evidence data.
1403    pub data: HashMap<String, String>,
1404}
1405
1406/// A contributing factor to anomaly confidence/severity.
1407#[derive(Debug, Clone, Serialize, Deserialize)]
1408pub struct ContributingFactor {
1409    /// Type of factor.
1410    pub factor_type: FactorType,
1411    /// Observed value.
1412    pub value: f64,
1413    /// Threshold or expected value.
1414    pub threshold: f64,
1415    /// Direction of comparison (true = value > threshold is anomalous).
1416    pub direction_greater: bool,
1417    /// Weight of this factor in overall calculation (0.0 - 1.0).
1418    pub weight: f64,
1419    /// Human-readable description.
1420    pub description: String,
1421    /// Optional supporting evidence.
1422    pub evidence: Option<FactorEvidence>,
1423}
1424
1425impl ContributingFactor {
1426    /// Creates a new contributing factor.
1427    pub fn new(
1428        factor_type: FactorType,
1429        value: f64,
1430        threshold: f64,
1431        direction_greater: bool,
1432        weight: f64,
1433        description: &str,
1434    ) -> Self {
1435        Self {
1436            factor_type,
1437            value,
1438            threshold,
1439            direction_greater,
1440            weight,
1441            description: description.to_string(),
1442            evidence: None,
1443        }
1444    }
1445
1446    /// Adds evidence to the factor.
1447    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1448        self.evidence = Some(FactorEvidence {
1449            source: source.to_string(),
1450            data,
1451        });
1452        self
1453    }
1454
1455    /// Calculates the factor's contribution to anomaly score.
1456    pub fn contribution(&self) -> f64 {
1457        let deviation = if self.direction_greater {
1458            (self.value - self.threshold).max(0.0)
1459        } else {
1460            (self.threshold - self.value).max(0.0)
1461        };
1462
1463        // Normalize by threshold to get relative deviation
1464        let relative_deviation = if self.threshold.abs() > 0.001 {
1465            deviation / self.threshold.abs()
1466        } else {
1467            deviation
1468        };
1469
1470        // Apply weight and cap at 1.0
1471        (relative_deviation * self.weight).min(1.0)
1472    }
1473}
1474
1475/// Enhanced anomaly label with dynamic confidence and severity.
1476#[derive(Debug, Clone, Serialize, Deserialize)]
1477pub struct EnhancedAnomalyLabel {
1478    /// Base labeled anomaly (backward compatible).
1479    pub base: LabeledAnomaly,
1480    /// Enhanced category classification.
1481    pub category: AnomalyCategory,
1482    /// Dynamically calculated confidence (0.0 - 1.0).
1483    pub enhanced_confidence: f64,
1484    /// Contextually calculated severity (0.0 - 1.0).
1485    pub enhanced_severity: f64,
1486    /// Factors contributing to confidence/severity.
1487    pub contributing_factors: Vec<ContributingFactor>,
1488    /// Secondary categories (for multi-label classification).
1489    pub secondary_categories: Vec<AnomalyCategory>,
1490}
1491
1492impl EnhancedAnomalyLabel {
1493    /// Creates an enhanced label from a base labeled anomaly.
1494    pub fn from_base(base: LabeledAnomaly) -> Self {
1495        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1496        let enhanced_confidence = base.confidence;
1497        let enhanced_severity = base.severity as f64 / 5.0;
1498
1499        Self {
1500            base,
1501            category,
1502            enhanced_confidence,
1503            enhanced_severity,
1504            contributing_factors: Vec::new(),
1505            secondary_categories: Vec::new(),
1506        }
1507    }
1508
1509    /// Sets the enhanced confidence.
1510    pub fn with_confidence(mut self, confidence: f64) -> Self {
1511        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1512        self
1513    }
1514
1515    /// Sets the enhanced severity.
1516    pub fn with_severity(mut self, severity: f64) -> Self {
1517        self.enhanced_severity = severity.clamp(0.0, 1.0);
1518        self
1519    }
1520
1521    /// Adds a contributing factor.
1522    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1523        self.contributing_factors.push(factor);
1524        self
1525    }
1526
1527    /// Adds a secondary category.
1528    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1529        if !self.secondary_categories.contains(&category) && category != self.category {
1530            self.secondary_categories.push(category);
1531        }
1532        self
1533    }
1534
1535    /// Converts to an extended feature vector.
1536    ///
1537    /// Returns base features (15) + enhanced features (10) = 25 features.
1538    pub fn to_features(&self) -> Vec<f64> {
1539        let mut features = self.base.to_features();
1540
1541        // Enhanced features
1542        features.push(self.enhanced_confidence);
1543        features.push(self.enhanced_severity);
1544        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1545        features.push(self.secondary_categories.len() as f64);
1546        features.push(self.contributing_factors.len() as f64);
1547
1548        // Max factor weight
1549        let max_weight = self
1550            .contributing_factors
1551            .iter()
1552            .map(|f| f.weight)
1553            .fold(0.0, f64::max);
1554        features.push(max_weight);
1555
1556        // Factor type indicators (binary flags for key factor types)
1557        let has_control_bypass = self
1558            .contributing_factors
1559            .iter()
1560            .any(|f| f.factor_type == FactorType::ControlBypass);
1561        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1562
1563        let has_amount_deviation = self
1564            .contributing_factors
1565            .iter()
1566            .any(|f| f.factor_type == FactorType::AmountDeviation);
1567        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1568
1569        let has_timing = self
1570            .contributing_factors
1571            .iter()
1572            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1573        features.push(if has_timing { 1.0 } else { 0.0 });
1574
1575        let has_pattern_match = self
1576            .contributing_factors
1577            .iter()
1578            .any(|f| f.factor_type == FactorType::PatternMatch);
1579        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1580
1581        features
1582    }
1583
1584    /// Returns the number of features in the enhanced feature vector.
1585    pub fn feature_count() -> usize {
1586        25 // 15 base + 10 enhanced
1587    }
1588
1589    /// Returns feature names for the enhanced feature vector.
1590    pub fn feature_names() -> Vec<&'static str> {
1591        let mut names = LabeledAnomaly::feature_names();
1592        names.extend(vec![
1593            "enhanced_confidence",
1594            "enhanced_severity",
1595            "category_ordinal",
1596            "secondary_category_count",
1597            "contributing_factor_count",
1598            "max_factor_weight",
1599            "has_control_bypass",
1600            "has_amount_deviation",
1601            "has_timing_factor",
1602            "has_pattern_match",
1603        ]);
1604        names
1605    }
1606}
1607
1608// ============================================================================
1609// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1610// ============================================================================
1611
1612/// Severity level classification for anomalies.
1613#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1614pub enum SeverityLevel {
1615    /// Minor issue, low impact.
1616    Low,
1617    /// Moderate issue, noticeable impact.
1618    #[default]
1619    Medium,
1620    /// Significant issue, substantial impact.
1621    High,
1622    /// Critical issue, severe impact requiring immediate attention.
1623    Critical,
1624}
1625
1626impl SeverityLevel {
1627    /// Returns the numeric value (1-4) for the severity level.
1628    pub fn numeric(&self) -> u8 {
1629        match self {
1630            SeverityLevel::Low => 1,
1631            SeverityLevel::Medium => 2,
1632            SeverityLevel::High => 3,
1633            SeverityLevel::Critical => 4,
1634        }
1635    }
1636
1637    /// Creates a severity level from a numeric value.
1638    pub fn from_numeric(value: u8) -> Self {
1639        match value {
1640            1 => SeverityLevel::Low,
1641            2 => SeverityLevel::Medium,
1642            3 => SeverityLevel::High,
1643            _ => SeverityLevel::Critical,
1644        }
1645    }
1646
1647    /// Creates a severity level from a normalized score (0.0-1.0).
1648    pub fn from_score(score: f64) -> Self {
1649        match score {
1650            s if s < 0.25 => SeverityLevel::Low,
1651            s if s < 0.50 => SeverityLevel::Medium,
1652            s if s < 0.75 => SeverityLevel::High,
1653            _ => SeverityLevel::Critical,
1654        }
1655    }
1656
1657    /// Returns a normalized score (0.0-1.0) for this severity level.
1658    pub fn to_score(&self) -> f64 {
1659        match self {
1660            SeverityLevel::Low => 0.125,
1661            SeverityLevel::Medium => 0.375,
1662            SeverityLevel::High => 0.625,
1663            SeverityLevel::Critical => 0.875,
1664        }
1665    }
1666}
1667
1668/// Structured severity scoring for anomalies.
1669#[derive(Debug, Clone, Serialize, Deserialize)]
1670pub struct AnomalySeverity {
1671    /// Severity level classification.
1672    pub level: SeverityLevel,
1673    /// Continuous severity score (0.0-1.0).
1674    pub score: f64,
1675    /// Absolute financial impact amount.
1676    pub financial_impact: Decimal,
1677    /// Whether this exceeds materiality threshold.
1678    pub is_material: bool,
1679    /// Materiality threshold used for determination.
1680    #[serde(default, skip_serializing_if = "Option::is_none")]
1681    pub materiality_threshold: Option<Decimal>,
1682}
1683
1684impl AnomalySeverity {
1685    /// Creates a new severity assessment.
1686    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1687        Self {
1688            level,
1689            score: level.to_score(),
1690            financial_impact,
1691            is_material: false,
1692            materiality_threshold: None,
1693        }
1694    }
1695
1696    /// Creates severity from a score, auto-determining level.
1697    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1698        Self {
1699            level: SeverityLevel::from_score(score),
1700            score: score.clamp(0.0, 1.0),
1701            financial_impact,
1702            is_material: false,
1703            materiality_threshold: None,
1704        }
1705    }
1706
1707    /// Sets the materiality assessment.
1708    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1709        self.materiality_threshold = Some(threshold);
1710        self.is_material = self.financial_impact.abs() >= threshold;
1711        self
1712    }
1713}
1714
1715impl Default for AnomalySeverity {
1716    fn default() -> Self {
1717        Self {
1718            level: SeverityLevel::Medium,
1719            score: 0.5,
1720            financial_impact: Decimal::ZERO,
1721            is_material: false,
1722            materiality_threshold: None,
1723        }
1724    }
1725}
1726
1727/// Detection difficulty classification for anomalies.
1728///
1729/// Categorizes how difficult an anomaly is to detect, which is useful
1730/// for ML model benchmarking and audit procedure selection.
1731///
1732/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1733/// is used for drift event classification and has different variants.
1734#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1735pub enum AnomalyDetectionDifficulty {
1736    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1737    Trivial,
1738    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1739    Easy,
1740    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1741    #[default]
1742    Moderate,
1743    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1744    Hard,
1745    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1746    Expert,
1747}
1748
1749impl AnomalyDetectionDifficulty {
1750    /// Returns the expected detection rate for this difficulty level.
1751    pub fn expected_detection_rate(&self) -> f64 {
1752        match self {
1753            AnomalyDetectionDifficulty::Trivial => 0.99,
1754            AnomalyDetectionDifficulty::Easy => 0.90,
1755            AnomalyDetectionDifficulty::Moderate => 0.70,
1756            AnomalyDetectionDifficulty::Hard => 0.40,
1757            AnomalyDetectionDifficulty::Expert => 0.15,
1758        }
1759    }
1760
1761    /// Returns a numeric difficulty score (0.0-1.0).
1762    pub fn difficulty_score(&self) -> f64 {
1763        match self {
1764            AnomalyDetectionDifficulty::Trivial => 0.05,
1765            AnomalyDetectionDifficulty::Easy => 0.25,
1766            AnomalyDetectionDifficulty::Moderate => 0.50,
1767            AnomalyDetectionDifficulty::Hard => 0.75,
1768            AnomalyDetectionDifficulty::Expert => 0.95,
1769        }
1770    }
1771
1772    /// Creates a difficulty level from a score (0.0-1.0).
1773    pub fn from_score(score: f64) -> Self {
1774        match score {
1775            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1776            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1777            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1778            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1779            _ => AnomalyDetectionDifficulty::Expert,
1780        }
1781    }
1782
1783    /// Returns the name of this difficulty level.
1784    pub fn name(&self) -> &'static str {
1785        match self {
1786            AnomalyDetectionDifficulty::Trivial => "trivial",
1787            AnomalyDetectionDifficulty::Easy => "easy",
1788            AnomalyDetectionDifficulty::Moderate => "moderate",
1789            AnomalyDetectionDifficulty::Hard => "hard",
1790            AnomalyDetectionDifficulty::Expert => "expert",
1791        }
1792    }
1793}
1794
1795/// Ground truth certainty level for anomaly labels.
1796///
1797/// Indicates how certain we are that the label is correct.
1798#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1799pub enum GroundTruthCertainty {
1800    /// Definitively known (injected anomaly with full provenance).
1801    #[default]
1802    Definite,
1803    /// Highly probable based on strong evidence.
1804    Probable,
1805    /// Possibly an anomaly based on indirect evidence.
1806    Possible,
1807}
1808
1809impl GroundTruthCertainty {
1810    /// Returns a certainty score (0.0-1.0).
1811    pub fn certainty_score(&self) -> f64 {
1812        match self {
1813            GroundTruthCertainty::Definite => 1.0,
1814            GroundTruthCertainty::Probable => 0.8,
1815            GroundTruthCertainty::Possible => 0.5,
1816        }
1817    }
1818
1819    /// Returns the name of this certainty level.
1820    pub fn name(&self) -> &'static str {
1821        match self {
1822            GroundTruthCertainty::Definite => "definite",
1823            GroundTruthCertainty::Probable => "probable",
1824            GroundTruthCertainty::Possible => "possible",
1825        }
1826    }
1827}
1828
1829/// Detection method classification.
1830///
1831/// Indicates which detection methods are recommended or effective for an anomaly.
1832#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1833pub enum DetectionMethod {
1834    /// Simple rule-based detection (thresholds, filters).
1835    RuleBased,
1836    /// Statistical analysis (distributions, outlier detection).
1837    Statistical,
1838    /// Machine learning models (classification, anomaly detection).
1839    MachineLearning,
1840    /// Graph-based analysis (network patterns, relationships).
1841    GraphBased,
1842    /// Manual forensic audit procedures.
1843    ForensicAudit,
1844    /// Combination of multiple methods.
1845    Hybrid,
1846}
1847
1848impl DetectionMethod {
1849    /// Returns the name of this detection method.
1850    pub fn name(&self) -> &'static str {
1851        match self {
1852            DetectionMethod::RuleBased => "rule_based",
1853            DetectionMethod::Statistical => "statistical",
1854            DetectionMethod::MachineLearning => "machine_learning",
1855            DetectionMethod::GraphBased => "graph_based",
1856            DetectionMethod::ForensicAudit => "forensic_audit",
1857            DetectionMethod::Hybrid => "hybrid",
1858        }
1859    }
1860
1861    /// Returns a description of this detection method.
1862    pub fn description(&self) -> &'static str {
1863        match self {
1864            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1865            DetectionMethod::Statistical => "Statistical distribution analysis",
1866            DetectionMethod::MachineLearning => "ML classification models",
1867            DetectionMethod::GraphBased => "Network and relationship analysis",
1868            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1869            DetectionMethod::Hybrid => "Combined multi-method approach",
1870        }
1871    }
1872}
1873
1874/// Extended anomaly label with comprehensive multi-dimensional classification.
1875///
1876/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1877/// severity scoring, detection difficulty, recommended methods, and ground truth.
1878#[derive(Debug, Clone, Serialize, Deserialize)]
1879pub struct ExtendedAnomalyLabel {
1880    /// Base labeled anomaly.
1881    pub base: LabeledAnomaly,
1882    /// Enhanced category classification.
1883    pub category: AnomalyCategory,
1884    /// Structured severity assessment.
1885    pub severity: AnomalySeverity,
1886    /// Detection difficulty classification.
1887    pub detection_difficulty: AnomalyDetectionDifficulty,
1888    /// Recommended detection methods for this anomaly.
1889    pub recommended_methods: Vec<DetectionMethod>,
1890    /// Key indicators that should trigger detection.
1891    pub key_indicators: Vec<String>,
1892    /// Ground truth certainty level.
1893    pub ground_truth_certainty: GroundTruthCertainty,
1894    /// Contributing factors to confidence/severity.
1895    pub contributing_factors: Vec<ContributingFactor>,
1896    /// Related entity IDs (vendors, customers, employees, etc.).
1897    pub related_entity_ids: Vec<String>,
1898    /// Secondary categories for multi-label classification.
1899    pub secondary_categories: Vec<AnomalyCategory>,
1900    /// Scheme ID if part of a multi-stage fraud scheme.
1901    #[serde(default, skip_serializing_if = "Option::is_none")]
1902    pub scheme_id: Option<String>,
1903    /// Stage number within a scheme (1-indexed).
1904    #[serde(default, skip_serializing_if = "Option::is_none")]
1905    pub scheme_stage: Option<u32>,
1906    /// Whether this is a near-miss (suspicious but legitimate).
1907    #[serde(default)]
1908    pub is_near_miss: bool,
1909    /// Explanation if this is a near-miss.
1910    #[serde(default, skip_serializing_if = "Option::is_none")]
1911    pub near_miss_explanation: Option<String>,
1912}
1913
1914impl ExtendedAnomalyLabel {
1915    /// Creates an extended label from a base labeled anomaly.
1916    pub fn from_base(base: LabeledAnomaly) -> Self {
1917        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1918        let severity = AnomalySeverity {
1919            level: SeverityLevel::from_numeric(base.severity),
1920            score: base.severity as f64 / 5.0,
1921            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1922            is_material: false,
1923            materiality_threshold: None,
1924        };
1925
1926        Self {
1927            base,
1928            category,
1929            severity,
1930            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1931            recommended_methods: vec![DetectionMethod::RuleBased],
1932            key_indicators: Vec::new(),
1933            ground_truth_certainty: GroundTruthCertainty::Definite,
1934            contributing_factors: Vec::new(),
1935            related_entity_ids: Vec::new(),
1936            secondary_categories: Vec::new(),
1937            scheme_id: None,
1938            scheme_stage: None,
1939            is_near_miss: false,
1940            near_miss_explanation: None,
1941        }
1942    }
1943
1944    /// Sets the severity assessment.
1945    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1946        self.severity = severity;
1947        self
1948    }
1949
1950    /// Sets the detection difficulty.
1951    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1952        self.detection_difficulty = difficulty;
1953        self
1954    }
1955
1956    /// Adds a recommended detection method.
1957    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1958        if !self.recommended_methods.contains(&method) {
1959            self.recommended_methods.push(method);
1960        }
1961        self
1962    }
1963
1964    /// Sets the recommended detection methods.
1965    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1966        self.recommended_methods = methods;
1967        self
1968    }
1969
1970    /// Adds a key indicator.
1971    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1972        self.key_indicators.push(indicator.into());
1973        self
1974    }
1975
1976    /// Sets the ground truth certainty.
1977    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1978        self.ground_truth_certainty = certainty;
1979        self
1980    }
1981
1982    /// Adds a contributing factor.
1983    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1984        self.contributing_factors.push(factor);
1985        self
1986    }
1987
1988    /// Adds a related entity ID.
1989    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
1990        self.related_entity_ids.push(entity_id.into());
1991        self
1992    }
1993
1994    /// Adds a secondary category.
1995    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1996        if category != self.category && !self.secondary_categories.contains(&category) {
1997            self.secondary_categories.push(category);
1998        }
1999        self
2000    }
2001
2002    /// Sets scheme information.
2003    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2004        self.scheme_id = Some(scheme_id.into());
2005        self.scheme_stage = Some(stage);
2006        self
2007    }
2008
2009    /// Marks this as a near-miss with explanation.
2010    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2011        self.is_near_miss = true;
2012        self.near_miss_explanation = Some(explanation.into());
2013        self
2014    }
2015
2016    /// Converts to an extended feature vector for ML.
2017    ///
2018    /// Returns base features (15) + extended features (15) = 30 features.
2019    pub fn to_features(&self) -> Vec<f64> {
2020        let mut features = self.base.to_features();
2021
2022        // Extended features
2023        features.push(self.severity.score);
2024        features.push(self.severity.level.to_score());
2025        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2026        features.push(self.detection_difficulty.difficulty_score());
2027        features.push(self.detection_difficulty.expected_detection_rate());
2028        features.push(self.ground_truth_certainty.certainty_score());
2029        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2030        features.push(self.secondary_categories.len() as f64);
2031        features.push(self.contributing_factors.len() as f64);
2032        features.push(self.key_indicators.len() as f64);
2033        features.push(self.recommended_methods.len() as f64);
2034        features.push(self.related_entity_ids.len() as f64);
2035        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2036        features.push(self.scheme_stage.unwrap_or(0) as f64);
2037        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2038
2039        features
2040    }
2041
2042    /// Returns the number of features in the extended feature vector.
2043    pub fn feature_count() -> usize {
2044        30 // 15 base + 15 extended
2045    }
2046
2047    /// Returns feature names for the extended feature vector.
2048    pub fn feature_names() -> Vec<&'static str> {
2049        let mut names = LabeledAnomaly::feature_names();
2050        names.extend(vec![
2051            "severity_score",
2052            "severity_level_score",
2053            "is_material",
2054            "difficulty_score",
2055            "expected_detection_rate",
2056            "ground_truth_certainty",
2057            "category_ordinal",
2058            "secondary_category_count",
2059            "contributing_factor_count",
2060            "key_indicator_count",
2061            "recommended_method_count",
2062            "related_entity_count",
2063            "is_part_of_scheme",
2064            "scheme_stage",
2065            "is_near_miss",
2066        ]);
2067        names
2068    }
2069}
2070
2071// ============================================================================
2072// MULTI-STAGE FRAUD SCHEME TYPES
2073// ============================================================================
2074
2075/// Type of multi-stage fraud scheme.
2076#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2077pub enum SchemeType {
2078    /// Gradual embezzlement over time.
2079    GradualEmbezzlement,
2080    /// Revenue manipulation across periods.
2081    RevenueManipulation,
2082    /// Vendor kickback scheme.
2083    VendorKickback,
2084    /// Round-tripping funds through multiple entities.
2085    RoundTripping,
2086    /// Ghost employee scheme.
2087    GhostEmployee,
2088    /// Expense reimbursement fraud.
2089    ExpenseReimbursement,
2090    /// Inventory theft scheme.
2091    InventoryTheft,
2092    /// Custom scheme type.
2093    Custom,
2094}
2095
2096impl SchemeType {
2097    /// Returns the name of this scheme type.
2098    pub fn name(&self) -> &'static str {
2099        match self {
2100            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2101            SchemeType::RevenueManipulation => "revenue_manipulation",
2102            SchemeType::VendorKickback => "vendor_kickback",
2103            SchemeType::RoundTripping => "round_tripping",
2104            SchemeType::GhostEmployee => "ghost_employee",
2105            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2106            SchemeType::InventoryTheft => "inventory_theft",
2107            SchemeType::Custom => "custom",
2108        }
2109    }
2110
2111    /// Returns the typical number of stages for this scheme type.
2112    pub fn typical_stages(&self) -> u32 {
2113        match self {
2114            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2115            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2116            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2117            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2118            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2119            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2120            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2121            SchemeType::Custom => 4,
2122        }
2123    }
2124}
2125
2126/// Status of detection for a fraud scheme.
2127#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2128pub enum SchemeDetectionStatus {
2129    /// Scheme is undetected.
2130    #[default]
2131    Undetected,
2132    /// Under investigation but not confirmed.
2133    UnderInvestigation,
2134    /// Partially detected (some transactions flagged).
2135    PartiallyDetected,
2136    /// Fully detected and confirmed.
2137    FullyDetected,
2138}
2139
2140/// Reference to a transaction within a scheme.
2141#[derive(Debug, Clone, Serialize, Deserialize)]
2142pub struct SchemeTransactionRef {
2143    /// Document ID of the transaction.
2144    pub document_id: String,
2145    /// Transaction date.
2146    pub date: chrono::NaiveDate,
2147    /// Transaction amount.
2148    pub amount: Decimal,
2149    /// Stage this transaction belongs to.
2150    pub stage: u32,
2151    /// Anomaly ID if labeled.
2152    #[serde(default, skip_serializing_if = "Option::is_none")]
2153    pub anomaly_id: Option<String>,
2154}
2155
2156/// Concealment technique used in fraud.
2157#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2158pub enum ConcealmentTechnique {
2159    /// Document manipulation or forgery.
2160    DocumentManipulation,
2161    /// Circumventing approval processes.
2162    ApprovalCircumvention,
2163    /// Exploiting timing (period-end, holidays).
2164    TimingExploitation,
2165    /// Transaction splitting to avoid thresholds.
2166    TransactionSplitting,
2167    /// Account misclassification.
2168    AccountMisclassification,
2169    /// Collusion with other employees.
2170    Collusion,
2171    /// Data alteration or deletion.
2172    DataAlteration,
2173    /// Creating false documentation.
2174    FalseDocumentation,
2175}
2176
2177impl ConcealmentTechnique {
2178    /// Returns the difficulty bonus this technique adds.
2179    pub fn difficulty_bonus(&self) -> f64 {
2180        match self {
2181            ConcealmentTechnique::DocumentManipulation => 0.20,
2182            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2183            ConcealmentTechnique::TimingExploitation => 0.10,
2184            ConcealmentTechnique::TransactionSplitting => 0.15,
2185            ConcealmentTechnique::AccountMisclassification => 0.10,
2186            ConcealmentTechnique::Collusion => 0.25,
2187            ConcealmentTechnique::DataAlteration => 0.20,
2188            ConcealmentTechnique::FalseDocumentation => 0.15,
2189        }
2190    }
2191}
2192
2193// ============================================================================
2194// ACFE-ALIGNED FRAUD TAXONOMY
2195// ============================================================================
2196//
2197// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2198// Nations: Occupational Fraud Classification System. This taxonomy provides
2199// ACFE-aligned categories, schemes, and calibration data.
2200
2201/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2202///
2203/// ACFE Report to the Nations statistics (typical):
2204/// - Asset Misappropriation: 86% of cases, $100k median loss
2205/// - Corruption: 33% of cases, $150k median loss
2206/// - Financial Statement Fraud: 10% of cases, $954k median loss
2207///
2208/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2209#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2210pub enum AcfeFraudCategory {
2211    /// Theft of organizational assets (cash, inventory, equipment).
2212    /// Most common (86% of cases) but typically lowest median loss ($100k).
2213    #[default]
2214    AssetMisappropriation,
2215    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2216    /// Medium frequency (33% of cases), medium median loss ($150k).
2217    Corruption,
2218    /// Intentional misstatement of financial statements.
2219    /// Least common (10% of cases) but highest median loss ($954k).
2220    FinancialStatementFraud,
2221}
2222
2223impl AcfeFraudCategory {
2224    /// Returns the name of this category.
2225    pub fn name(&self) -> &'static str {
2226        match self {
2227            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2228            AcfeFraudCategory::Corruption => "corruption",
2229            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2230        }
2231    }
2232
2233    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2234    pub fn typical_occurrence_rate(&self) -> f64 {
2235        match self {
2236            AcfeFraudCategory::AssetMisappropriation => 0.86,
2237            AcfeFraudCategory::Corruption => 0.33,
2238            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2239        }
2240    }
2241
2242    /// Returns the typical median loss amount (from ACFE reports).
2243    pub fn typical_median_loss(&self) -> Decimal {
2244        match self {
2245            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2246            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2247            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2248        }
2249    }
2250
2251    /// Returns the typical detection time in months (from ACFE reports).
2252    pub fn typical_detection_months(&self) -> u32 {
2253        match self {
2254            AcfeFraudCategory::AssetMisappropriation => 12,
2255            AcfeFraudCategory::Corruption => 18,
2256            AcfeFraudCategory::FinancialStatementFraud => 24,
2257        }
2258    }
2259}
2260
2261/// Cash-based fraud schemes under Asset Misappropriation.
2262///
2263/// Organized according to the ACFE Fraud Tree:
2264/// - Theft of Cash on Hand
2265/// - Theft of Cash Receipts
2266/// - Fraudulent Disbursements
2267#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2268pub enum CashFraudScheme {
2269    // ========== Theft of Cash on Hand ==========
2270    /// Stealing cash from cash drawers or safes after it has been recorded.
2271    Larceny,
2272    /// Stealing cash before it is recorded in the books (intercepts receipts).
2273    Skimming,
2274
2275    // ========== Theft of Cash Receipts ==========
2276    /// Skimming from sales transactions before recording.
2277    SalesSkimming,
2278    /// Intercepting customer payments on accounts receivable.
2279    ReceivablesSkimming,
2280    /// Creating false refunds to pocket the difference.
2281    RefundSchemes,
2282
2283    // ========== Fraudulent Disbursements - Billing Schemes ==========
2284    /// Creating fictitious vendors to invoice and pay.
2285    ShellCompany,
2286    /// Manipulating payments to legitimate vendors for personal gain.
2287    NonAccompliceVendor,
2288    /// Using company funds for personal purchases.
2289    PersonalPurchases,
2290
2291    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2292    /// Creating fake employees to collect wages.
2293    GhostEmployee,
2294    /// Falsifying hours worked, sales commissions, or salary rates.
2295    FalsifiedWages,
2296    /// Manipulating commission calculations.
2297    CommissionSchemes,
2298
2299    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2300    /// Claiming non-business expenses as business expenses.
2301    MischaracterizedExpenses,
2302    /// Inflating legitimate expense amounts.
2303    OverstatedExpenses,
2304    /// Creating completely fictitious expenses.
2305    FictitiousExpenses,
2306
2307    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2308    /// Forging the signature of an authorized check signer.
2309    ForgedMaker,
2310    /// Intercepting and altering the endorsement on legitimate checks.
2311    ForgedEndorsement,
2312    /// Altering the payee on a legitimate check.
2313    AlteredPayee,
2314    /// Authorized signer writing checks for personal benefit.
2315    AuthorizedMaker,
2316
2317    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2318    /// Creating false voided transactions.
2319    FalseVoids,
2320    /// Processing fictitious refunds.
2321    FalseRefunds,
2322}
2323
2324impl CashFraudScheme {
2325    /// Returns the ACFE category this scheme belongs to.
2326    pub fn category(&self) -> AcfeFraudCategory {
2327        AcfeFraudCategory::AssetMisappropriation
2328    }
2329
2330    /// Returns the subcategory within the ACFE Fraud Tree.
2331    pub fn subcategory(&self) -> &'static str {
2332        match self {
2333            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2334            CashFraudScheme::SalesSkimming
2335            | CashFraudScheme::ReceivablesSkimming
2336            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2337            CashFraudScheme::ShellCompany
2338            | CashFraudScheme::NonAccompliceVendor
2339            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2340            CashFraudScheme::GhostEmployee
2341            | CashFraudScheme::FalsifiedWages
2342            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2343            CashFraudScheme::MischaracterizedExpenses
2344            | CashFraudScheme::OverstatedExpenses
2345            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2346            CashFraudScheme::ForgedMaker
2347            | CashFraudScheme::ForgedEndorsement
2348            | CashFraudScheme::AlteredPayee
2349            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2350            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2351        }
2352    }
2353
2354    /// Returns the typical severity (1-5) for this scheme.
2355    pub fn severity(&self) -> u8 {
2356        match self {
2357            // Lower severity - often small amounts, easier to detect
2358            CashFraudScheme::FalseVoids
2359            | CashFraudScheme::FalseRefunds
2360            | CashFraudScheme::MischaracterizedExpenses => 3,
2361            // Medium severity
2362            CashFraudScheme::OverstatedExpenses
2363            | CashFraudScheme::Skimming
2364            | CashFraudScheme::Larceny
2365            | CashFraudScheme::PersonalPurchases
2366            | CashFraudScheme::FalsifiedWages => 4,
2367            // Higher severity - larger amounts, harder to detect
2368            CashFraudScheme::ShellCompany
2369            | CashFraudScheme::GhostEmployee
2370            | CashFraudScheme::FictitiousExpenses
2371            | CashFraudScheme::ForgedMaker
2372            | CashFraudScheme::AuthorizedMaker => 5,
2373            _ => 4,
2374        }
2375    }
2376
2377    /// Returns the typical detection difficulty.
2378    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2379        match self {
2380            // Easy to detect with basic controls
2381            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2382                AnomalyDetectionDifficulty::Easy
2383            }
2384            // Moderate - requires reconciliation
2385            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2386                AnomalyDetectionDifficulty::Moderate
2387            }
2388            // Hard - requires sophisticated analysis
2389            CashFraudScheme::Skimming
2390            | CashFraudScheme::ShellCompany
2391            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2392            // Expert level
2393            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2394                AnomalyDetectionDifficulty::Expert
2395            }
2396            _ => AnomalyDetectionDifficulty::Moderate,
2397        }
2398    }
2399
2400    /// Returns all variants for iteration.
2401    pub fn all_variants() -> &'static [CashFraudScheme] {
2402        &[
2403            CashFraudScheme::Larceny,
2404            CashFraudScheme::Skimming,
2405            CashFraudScheme::SalesSkimming,
2406            CashFraudScheme::ReceivablesSkimming,
2407            CashFraudScheme::RefundSchemes,
2408            CashFraudScheme::ShellCompany,
2409            CashFraudScheme::NonAccompliceVendor,
2410            CashFraudScheme::PersonalPurchases,
2411            CashFraudScheme::GhostEmployee,
2412            CashFraudScheme::FalsifiedWages,
2413            CashFraudScheme::CommissionSchemes,
2414            CashFraudScheme::MischaracterizedExpenses,
2415            CashFraudScheme::OverstatedExpenses,
2416            CashFraudScheme::FictitiousExpenses,
2417            CashFraudScheme::ForgedMaker,
2418            CashFraudScheme::ForgedEndorsement,
2419            CashFraudScheme::AlteredPayee,
2420            CashFraudScheme::AuthorizedMaker,
2421            CashFraudScheme::FalseVoids,
2422            CashFraudScheme::FalseRefunds,
2423        ]
2424    }
2425}
2426
2427/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2428#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2429pub enum AssetFraudScheme {
2430    // ========== Inventory Schemes ==========
2431    /// Misusing or converting inventory for personal benefit.
2432    InventoryMisuse,
2433    /// Stealing physical inventory items.
2434    InventoryTheft,
2435    /// Manipulating purchasing to facilitate theft.
2436    InventoryPurchasingScheme,
2437    /// Manipulating receiving/shipping to steal inventory.
2438    InventoryReceivingScheme,
2439
2440    // ========== Other Asset Schemes ==========
2441    /// Misusing company equipment or vehicles.
2442    EquipmentMisuse,
2443    /// Theft of company equipment, tools, or supplies.
2444    EquipmentTheft,
2445    /// Unauthorized access to or theft of intellectual property.
2446    IntellectualPropertyTheft,
2447    /// Using company time/resources for personal business.
2448    TimeTheft,
2449}
2450
2451impl AssetFraudScheme {
2452    /// Returns the ACFE category this scheme belongs to.
2453    pub fn category(&self) -> AcfeFraudCategory {
2454        AcfeFraudCategory::AssetMisappropriation
2455    }
2456
2457    /// Returns the subcategory within the ACFE Fraud Tree.
2458    pub fn subcategory(&self) -> &'static str {
2459        match self {
2460            AssetFraudScheme::InventoryMisuse
2461            | AssetFraudScheme::InventoryTheft
2462            | AssetFraudScheme::InventoryPurchasingScheme
2463            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2464            _ => "other_assets",
2465        }
2466    }
2467
2468    /// Returns the typical severity (1-5) for this scheme.
2469    pub fn severity(&self) -> u8 {
2470        match self {
2471            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2472            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2473            AssetFraudScheme::InventoryTheft
2474            | AssetFraudScheme::InventoryPurchasingScheme
2475            | AssetFraudScheme::InventoryReceivingScheme => 4,
2476            AssetFraudScheme::IntellectualPropertyTheft => 5,
2477        }
2478    }
2479}
2480
2481/// Corruption schemes under the ACFE Fraud Tree.
2482///
2483/// Corruption schemes involve the wrongful use of influence in a business
2484/// transaction to procure personal benefit.
2485#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2486pub enum CorruptionScheme {
2487    // ========== Conflicts of Interest ==========
2488    /// Employee has undisclosed financial interest in purchasing decisions.
2489    PurchasingConflict,
2490    /// Employee has undisclosed relationship with customer/vendor.
2491    SalesConflict,
2492    /// Employee owns or has interest in competing business.
2493    OutsideBusinessInterest,
2494    /// Employee makes decisions benefiting family members.
2495    NepotismConflict,
2496
2497    // ========== Bribery ==========
2498    /// Kickback payments from vendors for favorable treatment.
2499    InvoiceKickback,
2500    /// Collusion among vendors to inflate prices.
2501    BidRigging,
2502    /// Other cash payments for favorable decisions.
2503    CashBribery,
2504    /// Bribery of government officials.
2505    PublicOfficial,
2506
2507    // ========== Illegal Gratuities ==========
2508    /// Gifts given after favorable decisions (not agreed in advance).
2509    IllegalGratuity,
2510
2511    // ========== Economic Extortion ==========
2512    /// Demanding payment under threat of adverse action.
2513    EconomicExtortion,
2514}
2515
2516impl CorruptionScheme {
2517    /// Returns the ACFE category this scheme belongs to.
2518    pub fn category(&self) -> AcfeFraudCategory {
2519        AcfeFraudCategory::Corruption
2520    }
2521
2522    /// Returns the subcategory within the ACFE Fraud Tree.
2523    pub fn subcategory(&self) -> &'static str {
2524        match self {
2525            CorruptionScheme::PurchasingConflict
2526            | CorruptionScheme::SalesConflict
2527            | CorruptionScheme::OutsideBusinessInterest
2528            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2529            CorruptionScheme::InvoiceKickback
2530            | CorruptionScheme::BidRigging
2531            | CorruptionScheme::CashBribery
2532            | CorruptionScheme::PublicOfficial => "bribery",
2533            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2534            CorruptionScheme::EconomicExtortion => "economic_extortion",
2535        }
2536    }
2537
2538    /// Returns the typical severity (1-5) for this scheme.
2539    pub fn severity(&self) -> u8 {
2540        match self {
2541            // Lower severity conflicts of interest
2542            CorruptionScheme::NepotismConflict => 3,
2543            // Medium severity
2544            CorruptionScheme::PurchasingConflict
2545            | CorruptionScheme::SalesConflict
2546            | CorruptionScheme::OutsideBusinessInterest
2547            | CorruptionScheme::IllegalGratuity => 4,
2548            // High severity - active corruption
2549            CorruptionScheme::InvoiceKickback
2550            | CorruptionScheme::BidRigging
2551            | CorruptionScheme::CashBribery
2552            | CorruptionScheme::EconomicExtortion => 5,
2553            // Highest severity - involves public officials
2554            CorruptionScheme::PublicOfficial => 5,
2555        }
2556    }
2557
2558    /// Returns the typical detection difficulty.
2559    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2560        match self {
2561            // Easier to detect with proper disclosure requirements
2562            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2563                AnomalyDetectionDifficulty::Moderate
2564            }
2565            // Hard - requires transaction pattern analysis
2566            CorruptionScheme::PurchasingConflict
2567            | CorruptionScheme::SalesConflict
2568            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2569            // Expert level - deliberate concealment
2570            CorruptionScheme::InvoiceKickback
2571            | CorruptionScheme::CashBribery
2572            | CorruptionScheme::PublicOfficial
2573            | CorruptionScheme::IllegalGratuity
2574            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2575        }
2576    }
2577
2578    /// Returns all variants for iteration.
2579    pub fn all_variants() -> &'static [CorruptionScheme] {
2580        &[
2581            CorruptionScheme::PurchasingConflict,
2582            CorruptionScheme::SalesConflict,
2583            CorruptionScheme::OutsideBusinessInterest,
2584            CorruptionScheme::NepotismConflict,
2585            CorruptionScheme::InvoiceKickback,
2586            CorruptionScheme::BidRigging,
2587            CorruptionScheme::CashBribery,
2588            CorruptionScheme::PublicOfficial,
2589            CorruptionScheme::IllegalGratuity,
2590            CorruptionScheme::EconomicExtortion,
2591        ]
2592    }
2593}
2594
2595/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2596///
2597/// Financial statement fraud involves the intentional misstatement or omission
2598/// of material information in financial reports.
2599#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2600pub enum FinancialStatementScheme {
2601    // ========== Asset/Revenue Overstatement ==========
2602    /// Recording revenue before it is earned.
2603    PrematureRevenue,
2604    /// Deferring expenses to future periods.
2605    DelayedExpenses,
2606    /// Recording revenue for transactions that never occurred.
2607    FictitiousRevenues,
2608    /// Failing to record known liabilities.
2609    ConcealedLiabilities,
2610    /// Overstating the value of assets.
2611    ImproperAssetValuations,
2612    /// Omitting or misstating required disclosures.
2613    ImproperDisclosures,
2614    /// Manipulating timing of revenue recognition (channel stuffing).
2615    ChannelStuffing,
2616    /// Recognizing bill-and-hold revenue improperly.
2617    BillAndHold,
2618    /// Capitalizing expenses that should be expensed.
2619    ImproperCapitalization,
2620
2621    // ========== Asset/Revenue Understatement ==========
2622    /// Understating revenue (often for tax purposes).
2623    UnderstatedRevenues,
2624    /// Recording excessive expenses.
2625    OverstatedExpenses,
2626    /// Recording excessive liabilities or reserves.
2627    OverstatedLiabilities,
2628    /// Undervaluing assets for writedowns/reserves.
2629    ImproperAssetWritedowns,
2630}
2631
2632impl FinancialStatementScheme {
2633    /// Returns the ACFE category this scheme belongs to.
2634    pub fn category(&self) -> AcfeFraudCategory {
2635        AcfeFraudCategory::FinancialStatementFraud
2636    }
2637
2638    /// Returns the subcategory within the ACFE Fraud Tree.
2639    pub fn subcategory(&self) -> &'static str {
2640        match self {
2641            FinancialStatementScheme::UnderstatedRevenues
2642            | FinancialStatementScheme::OverstatedExpenses
2643            | FinancialStatementScheme::OverstatedLiabilities
2644            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2645            _ => "overstatement",
2646        }
2647    }
2648
2649    /// Returns the typical severity (1-5) for this scheme.
2650    pub fn severity(&self) -> u8 {
2651        // All financial statement fraud is high severity
2652        5
2653    }
2654
2655    /// Returns the typical detection difficulty.
2656    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2657        match self {
2658            // Easier to detect with good analytics
2659            FinancialStatementScheme::ChannelStuffing
2660            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2661            // Hard - requires deep analysis
2662            FinancialStatementScheme::PrematureRevenue
2663            | FinancialStatementScheme::ImproperCapitalization
2664            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2665            // Expert level
2666            FinancialStatementScheme::FictitiousRevenues
2667            | FinancialStatementScheme::ConcealedLiabilities
2668            | FinancialStatementScheme::ImproperAssetValuations
2669            | FinancialStatementScheme::ImproperDisclosures
2670            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2671            _ => AnomalyDetectionDifficulty::Hard,
2672        }
2673    }
2674
2675    /// Returns all variants for iteration.
2676    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2677        &[
2678            FinancialStatementScheme::PrematureRevenue,
2679            FinancialStatementScheme::DelayedExpenses,
2680            FinancialStatementScheme::FictitiousRevenues,
2681            FinancialStatementScheme::ConcealedLiabilities,
2682            FinancialStatementScheme::ImproperAssetValuations,
2683            FinancialStatementScheme::ImproperDisclosures,
2684            FinancialStatementScheme::ChannelStuffing,
2685            FinancialStatementScheme::BillAndHold,
2686            FinancialStatementScheme::ImproperCapitalization,
2687            FinancialStatementScheme::UnderstatedRevenues,
2688            FinancialStatementScheme::OverstatedExpenses,
2689            FinancialStatementScheme::OverstatedLiabilities,
2690            FinancialStatementScheme::ImproperAssetWritedowns,
2691        ]
2692    }
2693}
2694
2695/// Unified ACFE scheme type that encompasses all fraud schemes.
2696#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2697pub enum AcfeScheme {
2698    /// Cash-based fraud schemes.
2699    Cash(CashFraudScheme),
2700    /// Inventory and other asset fraud schemes.
2701    Asset(AssetFraudScheme),
2702    /// Corruption schemes.
2703    Corruption(CorruptionScheme),
2704    /// Financial statement fraud schemes.
2705    FinancialStatement(FinancialStatementScheme),
2706}
2707
2708impl AcfeScheme {
2709    /// Returns the ACFE category this scheme belongs to.
2710    pub fn category(&self) -> AcfeFraudCategory {
2711        match self {
2712            AcfeScheme::Cash(s) => s.category(),
2713            AcfeScheme::Asset(s) => s.category(),
2714            AcfeScheme::Corruption(s) => s.category(),
2715            AcfeScheme::FinancialStatement(s) => s.category(),
2716        }
2717    }
2718
2719    /// Returns the severity (1-5) for this scheme.
2720    pub fn severity(&self) -> u8 {
2721        match self {
2722            AcfeScheme::Cash(s) => s.severity(),
2723            AcfeScheme::Asset(s) => s.severity(),
2724            AcfeScheme::Corruption(s) => s.severity(),
2725            AcfeScheme::FinancialStatement(s) => s.severity(),
2726        }
2727    }
2728
2729    /// Returns the detection difficulty for this scheme.
2730    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2731        match self {
2732            AcfeScheme::Cash(s) => s.detection_difficulty(),
2733            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2734            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2735            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2736        }
2737    }
2738}
2739
2740/// How a fraud was detected (from ACFE statistics).
2741#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2742pub enum AcfeDetectionMethod {
2743    /// Tip from employee, customer, vendor, or anonymous source.
2744    Tip,
2745    /// Internal audit procedures.
2746    InternalAudit,
2747    /// Management review and oversight.
2748    ManagementReview,
2749    /// External audit procedures.
2750    ExternalAudit,
2751    /// Account reconciliation discrepancies.
2752    AccountReconciliation,
2753    /// Document examination.
2754    DocumentExamination,
2755    /// Discovered by accident.
2756    ByAccident,
2757    /// Automated monitoring/IT controls.
2758    ItControls,
2759    /// Surveillance or investigation.
2760    Surveillance,
2761    /// Confession by perpetrator.
2762    Confession,
2763    /// Law enforcement notification.
2764    LawEnforcement,
2765    /// Other detection method.
2766    Other,
2767}
2768
2769impl AcfeDetectionMethod {
2770    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2771    pub fn typical_detection_rate(&self) -> f64 {
2772        match self {
2773            AcfeDetectionMethod::Tip => 0.42,
2774            AcfeDetectionMethod::InternalAudit => 0.16,
2775            AcfeDetectionMethod::ManagementReview => 0.12,
2776            AcfeDetectionMethod::ExternalAudit => 0.04,
2777            AcfeDetectionMethod::AccountReconciliation => 0.05,
2778            AcfeDetectionMethod::DocumentExamination => 0.04,
2779            AcfeDetectionMethod::ByAccident => 0.06,
2780            AcfeDetectionMethod::ItControls => 0.03,
2781            AcfeDetectionMethod::Surveillance => 0.02,
2782            AcfeDetectionMethod::Confession => 0.02,
2783            AcfeDetectionMethod::LawEnforcement => 0.01,
2784            AcfeDetectionMethod::Other => 0.03,
2785        }
2786    }
2787
2788    /// Returns all variants for iteration.
2789    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2790        &[
2791            AcfeDetectionMethod::Tip,
2792            AcfeDetectionMethod::InternalAudit,
2793            AcfeDetectionMethod::ManagementReview,
2794            AcfeDetectionMethod::ExternalAudit,
2795            AcfeDetectionMethod::AccountReconciliation,
2796            AcfeDetectionMethod::DocumentExamination,
2797            AcfeDetectionMethod::ByAccident,
2798            AcfeDetectionMethod::ItControls,
2799            AcfeDetectionMethod::Surveillance,
2800            AcfeDetectionMethod::Confession,
2801            AcfeDetectionMethod::LawEnforcement,
2802            AcfeDetectionMethod::Other,
2803        ]
2804    }
2805}
2806
2807/// Department/position of perpetrator (from ACFE statistics).
2808#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2809pub enum PerpetratorDepartment {
2810    /// Accounting, finance, or bookkeeping.
2811    Accounting,
2812    /// Operations or manufacturing.
2813    Operations,
2814    /// Executive/upper management.
2815    Executive,
2816    /// Sales.
2817    Sales,
2818    /// Customer service.
2819    CustomerService,
2820    /// Purchasing/procurement.
2821    Purchasing,
2822    /// Information technology.
2823    It,
2824    /// Human resources.
2825    HumanResources,
2826    /// Administrative/clerical.
2827    Administrative,
2828    /// Warehouse/inventory.
2829    Warehouse,
2830    /// Board of directors.
2831    BoardOfDirectors,
2832    /// Other department.
2833    Other,
2834}
2835
2836impl PerpetratorDepartment {
2837    /// Returns the typical percentage of frauds by department (from ACFE reports).
2838    pub fn typical_occurrence_rate(&self) -> f64 {
2839        match self {
2840            PerpetratorDepartment::Accounting => 0.21,
2841            PerpetratorDepartment::Operations => 0.17,
2842            PerpetratorDepartment::Executive => 0.12,
2843            PerpetratorDepartment::Sales => 0.11,
2844            PerpetratorDepartment::CustomerService => 0.07,
2845            PerpetratorDepartment::Purchasing => 0.06,
2846            PerpetratorDepartment::It => 0.05,
2847            PerpetratorDepartment::HumanResources => 0.04,
2848            PerpetratorDepartment::Administrative => 0.04,
2849            PerpetratorDepartment::Warehouse => 0.03,
2850            PerpetratorDepartment::BoardOfDirectors => 0.02,
2851            PerpetratorDepartment::Other => 0.08,
2852        }
2853    }
2854
2855    /// Returns the typical median loss by perpetrator department.
2856    pub fn typical_median_loss(&self) -> Decimal {
2857        match self {
2858            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2859            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2860            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2861            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2862            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2863            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2864            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2865            _ => Decimal::new(80_000, 0),
2866        }
2867    }
2868}
2869
2870/// Perpetrator position level (from ACFE statistics).
2871#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2872pub enum PerpetratorLevel {
2873    /// Entry-level employee.
2874    Employee,
2875    /// Manager or supervisor.
2876    Manager,
2877    /// Owner, executive, or C-level.
2878    OwnerExecutive,
2879}
2880
2881impl PerpetratorLevel {
2882    /// Returns the typical percentage of frauds by position level.
2883    pub fn typical_occurrence_rate(&self) -> f64 {
2884        match self {
2885            PerpetratorLevel::Employee => 0.42,
2886            PerpetratorLevel::Manager => 0.36,
2887            PerpetratorLevel::OwnerExecutive => 0.22,
2888        }
2889    }
2890
2891    /// Returns the typical median loss by position level.
2892    pub fn typical_median_loss(&self) -> Decimal {
2893        match self {
2894            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2895            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2896            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2897        }
2898    }
2899}
2900
2901/// ACFE Calibration data for fraud generation.
2902///
2903/// Contains statistical parameters based on ACFE Report to the Nations
2904/// for realistic fraud pattern generation.
2905#[derive(Debug, Clone, Serialize, Deserialize)]
2906pub struct AcfeCalibration {
2907    /// Overall median loss for occupational fraud ($117,000 typical).
2908    pub median_loss: Decimal,
2909    /// Median duration in months before detection (12 months typical).
2910    pub median_duration_months: u32,
2911    /// Distribution of fraud by category.
2912    pub category_distribution: HashMap<String, f64>,
2913    /// Distribution of detection methods.
2914    pub detection_method_distribution: HashMap<String, f64>,
2915    /// Distribution by perpetrator department.
2916    pub department_distribution: HashMap<String, f64>,
2917    /// Distribution by perpetrator level.
2918    pub level_distribution: HashMap<String, f64>,
2919    /// Average number of red flags per fraud case.
2920    pub avg_red_flags_per_case: f64,
2921    /// Percentage of frauds involving collusion.
2922    pub collusion_rate: f64,
2923}
2924
2925impl Default for AcfeCalibration {
2926    fn default() -> Self {
2927        let mut category_distribution = HashMap::new();
2928        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2929        category_distribution.insert("corruption".to_string(), 0.33);
2930        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2931
2932        let mut detection_method_distribution = HashMap::new();
2933        for method in AcfeDetectionMethod::all_variants() {
2934            detection_method_distribution.insert(
2935                format!("{method:?}").to_lowercase(),
2936                method.typical_detection_rate(),
2937            );
2938        }
2939
2940        let mut department_distribution = HashMap::new();
2941        department_distribution.insert("accounting".to_string(), 0.21);
2942        department_distribution.insert("operations".to_string(), 0.17);
2943        department_distribution.insert("executive".to_string(), 0.12);
2944        department_distribution.insert("sales".to_string(), 0.11);
2945        department_distribution.insert("customer_service".to_string(), 0.07);
2946        department_distribution.insert("purchasing".to_string(), 0.06);
2947        department_distribution.insert("other".to_string(), 0.26);
2948
2949        let mut level_distribution = HashMap::new();
2950        level_distribution.insert("employee".to_string(), 0.42);
2951        level_distribution.insert("manager".to_string(), 0.36);
2952        level_distribution.insert("owner_executive".to_string(), 0.22);
2953
2954        Self {
2955            median_loss: Decimal::new(117_000, 0),
2956            median_duration_months: 12,
2957            category_distribution,
2958            detection_method_distribution,
2959            department_distribution,
2960            level_distribution,
2961            avg_red_flags_per_case: 2.8,
2962            collusion_rate: 0.50,
2963        }
2964    }
2965}
2966
2967impl AcfeCalibration {
2968    /// Creates a new ACFE calibration with the given parameters.
2969    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2970        Self {
2971            median_loss,
2972            median_duration_months,
2973            ..Self::default()
2974        }
2975    }
2976
2977    /// Returns the median loss for a specific category.
2978    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2979        category.typical_median_loss()
2980    }
2981
2982    /// Returns the median duration for a specific category.
2983    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
2984        category.typical_detection_months()
2985    }
2986
2987    /// Validates the calibration data.
2988    pub fn validate(&self) -> Result<(), String> {
2989        if self.median_loss <= Decimal::ZERO {
2990            return Err("Median loss must be positive".to_string());
2991        }
2992        if self.median_duration_months == 0 {
2993            return Err("Median duration must be at least 1 month".to_string());
2994        }
2995        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
2996            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
2997        }
2998        Ok(())
2999    }
3000}
3001
3002/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3003///
3004/// The fraud triangle is a model for explaining the factors that cause
3005/// someone to commit occupational fraud.
3006#[derive(Debug, Clone, Serialize, Deserialize)]
3007pub struct FraudTriangle {
3008    /// Pressure or incentive to commit fraud.
3009    pub pressure: PressureType,
3010    /// Opportunity factors that enable fraud.
3011    pub opportunities: Vec<OpportunityFactor>,
3012    /// Rationalization used to justify the fraud.
3013    pub rationalization: Rationalization,
3014}
3015
3016impl FraudTriangle {
3017    /// Creates a new fraud triangle.
3018    pub fn new(
3019        pressure: PressureType,
3020        opportunities: Vec<OpportunityFactor>,
3021        rationalization: Rationalization,
3022    ) -> Self {
3023        Self {
3024            pressure,
3025            opportunities,
3026            rationalization,
3027        }
3028    }
3029
3030    /// Returns a risk score based on the fraud triangle components.
3031    pub fn risk_score(&self) -> f64 {
3032        let pressure_score = self.pressure.risk_weight();
3033        let opportunity_score: f64 = self
3034            .opportunities
3035            .iter()
3036            .map(OpportunityFactor::risk_weight)
3037            .sum::<f64>()
3038            / self.opportunities.len().max(1) as f64;
3039        let rationalization_score = self.rationalization.risk_weight();
3040
3041        (pressure_score + opportunity_score + rationalization_score) / 3.0
3042    }
3043}
3044
3045/// Types of pressure/incentive that can lead to fraud.
3046#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3047pub enum PressureType {
3048    // Financial Pressures
3049    /// Personal financial difficulties (debt, lifestyle beyond means).
3050    PersonalFinancialDifficulties,
3051    /// Pressure to meet financial targets/earnings expectations.
3052    FinancialTargets,
3053    /// Market or analyst expectations.
3054    MarketExpectations,
3055    /// Debt covenant compliance requirements.
3056    CovenantCompliance,
3057    /// Credit rating maintenance.
3058    CreditRatingMaintenance,
3059    /// Acquisition/merger valuation pressure.
3060    AcquisitionValuation,
3061
3062    // Non-Financial Pressures
3063    /// Fear of job loss.
3064    JobSecurity,
3065    /// Pressure to maintain status or image.
3066    StatusMaintenance,
3067    /// Gambling addiction.
3068    GamblingAddiction,
3069    /// Substance abuse issues.
3070    SubstanceAbuse,
3071    /// Family pressure or obligations.
3072    FamilyPressure,
3073    /// Greed or desire for more.
3074    Greed,
3075}
3076
3077impl PressureType {
3078    /// Returns the risk weight (0.0-1.0) for this pressure type.
3079    pub fn risk_weight(&self) -> f64 {
3080        match self {
3081            PressureType::PersonalFinancialDifficulties => 0.80,
3082            PressureType::FinancialTargets => 0.75,
3083            PressureType::MarketExpectations => 0.70,
3084            PressureType::CovenantCompliance => 0.85,
3085            PressureType::CreditRatingMaintenance => 0.70,
3086            PressureType::AcquisitionValuation => 0.75,
3087            PressureType::JobSecurity => 0.65,
3088            PressureType::StatusMaintenance => 0.55,
3089            PressureType::GamblingAddiction => 0.90,
3090            PressureType::SubstanceAbuse => 0.85,
3091            PressureType::FamilyPressure => 0.60,
3092            PressureType::Greed => 0.70,
3093        }
3094    }
3095}
3096
3097/// Opportunity factors that enable fraud.
3098#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3099pub enum OpportunityFactor {
3100    /// Weak internal controls.
3101    WeakInternalControls,
3102    /// Lack of segregation of duties.
3103    LackOfSegregation,
3104    /// Override capability.
3105    ManagementOverride,
3106    /// Complex or unusual transactions.
3107    ComplexTransactions,
3108    /// Related party transactions.
3109    RelatedPartyTransactions,
3110    /// Poor tone at the top.
3111    PoorToneAtTop,
3112    /// Inadequate supervision.
3113    InadequateSupervision,
3114    /// Access to assets without accountability.
3115    AssetAccess,
3116    /// Inadequate record keeping.
3117    PoorRecordKeeping,
3118    /// Failure to discipline fraud perpetrators.
3119    LackOfDiscipline,
3120    /// Lack of independent checks.
3121    LackOfIndependentChecks,
3122}
3123
3124impl OpportunityFactor {
3125    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3126    pub fn risk_weight(&self) -> f64 {
3127        match self {
3128            OpportunityFactor::WeakInternalControls => 0.85,
3129            OpportunityFactor::LackOfSegregation => 0.80,
3130            OpportunityFactor::ManagementOverride => 0.90,
3131            OpportunityFactor::ComplexTransactions => 0.70,
3132            OpportunityFactor::RelatedPartyTransactions => 0.75,
3133            OpportunityFactor::PoorToneAtTop => 0.85,
3134            OpportunityFactor::InadequateSupervision => 0.75,
3135            OpportunityFactor::AssetAccess => 0.70,
3136            OpportunityFactor::PoorRecordKeeping => 0.65,
3137            OpportunityFactor::LackOfDiscipline => 0.60,
3138            OpportunityFactor::LackOfIndependentChecks => 0.75,
3139        }
3140    }
3141}
3142
3143/// Rationalizations used by fraud perpetrators.
3144#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3145pub enum Rationalization {
3146    /// "I'm just borrowing; I'll pay it back."
3147    TemporaryBorrowing,
3148    /// "Everyone does it."
3149    EveryoneDoesIt,
3150    /// "It's for the good of the company."
3151    ForTheCompanyGood,
3152    /// "I deserve this; the company owes me."
3153    Entitlement,
3154    /// "I was just following orders."
3155    FollowingOrders,
3156    /// "They won't miss it; they have plenty."
3157    TheyWontMissIt,
3158    /// "I need it more than they do."
3159    NeedItMore,
3160    /// "It's not really stealing."
3161    NotReallyStealing,
3162    /// "I'm underpaid for what I do."
3163    Underpaid,
3164    /// "It's a victimless crime."
3165    VictimlessCrime,
3166}
3167
3168impl Rationalization {
3169    /// Returns the risk weight (0.0-1.0) for this rationalization.
3170    pub fn risk_weight(&self) -> f64 {
3171        match self {
3172            // More dangerous rationalizations
3173            Rationalization::Entitlement => 0.85,
3174            Rationalization::EveryoneDoesIt => 0.80,
3175            Rationalization::NotReallyStealing => 0.80,
3176            Rationalization::TheyWontMissIt => 0.75,
3177            // Medium risk
3178            Rationalization::Underpaid => 0.70,
3179            Rationalization::ForTheCompanyGood => 0.65,
3180            Rationalization::NeedItMore => 0.65,
3181            // Lower risk (still indicates fraud)
3182            Rationalization::TemporaryBorrowing => 0.60,
3183            Rationalization::FollowingOrders => 0.55,
3184            Rationalization::VictimlessCrime => 0.60,
3185        }
3186    }
3187}
3188
3189// ============================================================================
3190// NEAR-MISS TYPES
3191// ============================================================================
3192
3193/// Type of near-miss pattern (suspicious but legitimate).
3194#[derive(Debug, Clone, Serialize, Deserialize)]
3195pub enum NearMissPattern {
3196    /// Transaction very similar to another (possible duplicate but legitimate).
3197    NearDuplicate {
3198        /// Date difference from similar transaction.
3199        date_difference_days: u32,
3200        /// Original transaction ID.
3201        similar_transaction_id: String,
3202    },
3203    /// Amount just below approval threshold (but legitimate).
3204    ThresholdProximity {
3205        /// The threshold being approached.
3206        threshold: Decimal,
3207        /// Percentage of threshold (0.0-1.0).
3208        proximity: f64,
3209    },
3210    /// Unusual but legitimate business pattern.
3211    UnusualLegitimate {
3212        /// Type of legitimate pattern.
3213        pattern_type: LegitimatePatternType,
3214        /// Business justification.
3215        justification: String,
3216    },
3217    /// Error that was caught and corrected.
3218    CorrectedError {
3219        /// Days until correction.
3220        correction_lag_days: u32,
3221        /// Correction document ID.
3222        correction_document_id: String,
3223    },
3224}
3225
3226/// Types of unusual but legitimate business patterns.
3227#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3228pub enum LegitimatePatternType {
3229    /// Year-end bonus payment.
3230    YearEndBonus,
3231    /// Contract prepayment.
3232    ContractPrepayment,
3233    /// Settlement payment.
3234    SettlementPayment,
3235    /// Insurance claim.
3236    InsuranceClaim,
3237    /// One-time vendor payment.
3238    OneTimePayment,
3239    /// Asset disposal.
3240    AssetDisposal,
3241    /// Seasonal inventory buildup.
3242    SeasonalInventory,
3243    /// Promotional spending.
3244    PromotionalSpending,
3245}
3246
3247impl LegitimatePatternType {
3248    /// Returns a description of this pattern type.
3249    pub fn description(&self) -> &'static str {
3250        match self {
3251            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3252            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3253            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3254            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3255            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3256            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3257            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3258            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3259        }
3260    }
3261}
3262
3263/// What might trigger a false positive for this near-miss.
3264#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3265pub enum FalsePositiveTrigger {
3266    /// Amount is near threshold.
3267    AmountNearThreshold,
3268    /// Timing is unusual.
3269    UnusualTiming,
3270    /// Similar to existing transaction.
3271    SimilarTransaction,
3272    /// New counterparty.
3273    NewCounterparty,
3274    /// Account combination unusual.
3275    UnusualAccountCombination,
3276    /// Volume spike.
3277    VolumeSpike,
3278    /// Round amount.
3279    RoundAmount,
3280}
3281
3282/// Label for a near-miss case.
3283#[derive(Debug, Clone, Serialize, Deserialize)]
3284pub struct NearMissLabel {
3285    /// Document ID.
3286    pub document_id: String,
3287    /// The near-miss pattern.
3288    pub pattern: NearMissPattern,
3289    /// How suspicious it appears (0.0-1.0).
3290    pub suspicion_score: f64,
3291    /// What would trigger a false positive.
3292    pub false_positive_trigger: FalsePositiveTrigger,
3293    /// Why this is actually legitimate.
3294    pub explanation: String,
3295}
3296
3297impl NearMissLabel {
3298    /// Creates a new near-miss label.
3299    pub fn new(
3300        document_id: impl Into<String>,
3301        pattern: NearMissPattern,
3302        suspicion_score: f64,
3303        trigger: FalsePositiveTrigger,
3304        explanation: impl Into<String>,
3305    ) -> Self {
3306        Self {
3307            document_id: document_id.into(),
3308            pattern,
3309            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3310            false_positive_trigger: trigger,
3311            explanation: explanation.into(),
3312        }
3313    }
3314}
3315
3316/// Configuration for anomaly rates.
3317#[derive(Debug, Clone, Serialize, Deserialize)]
3318pub struct AnomalyRateConfig {
3319    /// Overall anomaly rate (0.0 - 1.0).
3320    pub total_rate: f64,
3321    /// Fraud rate as proportion of anomalies.
3322    pub fraud_rate: f64,
3323    /// Error rate as proportion of anomalies.
3324    pub error_rate: f64,
3325    /// Process issue rate as proportion of anomalies.
3326    pub process_issue_rate: f64,
3327    /// Statistical anomaly rate as proportion of anomalies.
3328    pub statistical_rate: f64,
3329    /// Relational anomaly rate as proportion of anomalies.
3330    pub relational_rate: f64,
3331}
3332
3333impl Default for AnomalyRateConfig {
3334    fn default() -> Self {
3335        Self {
3336            total_rate: 0.02,         // 2% of transactions are anomalous
3337            fraud_rate: 0.25,         // 25% of anomalies are fraud
3338            error_rate: 0.35,         // 35% of anomalies are errors
3339            process_issue_rate: 0.20, // 20% are process issues
3340            statistical_rate: 0.15,   // 15% are statistical
3341            relational_rate: 0.05,    // 5% are relational
3342        }
3343    }
3344}
3345
3346impl AnomalyRateConfig {
3347    /// Validates that rates sum to approximately 1.0.
3348    pub fn validate(&self) -> Result<(), String> {
3349        let sum = self.fraud_rate
3350            + self.error_rate
3351            + self.process_issue_rate
3352            + self.statistical_rate
3353            + self.relational_rate;
3354
3355        if (sum - 1.0).abs() > 0.01 {
3356            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3357        }
3358
3359        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3360            return Err(format!(
3361                "Total rate must be between 0.0 and 1.0, got {}",
3362                self.total_rate
3363            ));
3364        }
3365
3366        Ok(())
3367    }
3368}
3369
3370#[cfg(test)]
3371#[allow(clippy::unwrap_used)]
3372mod tests {
3373    use super::*;
3374    use rust_decimal_macros::dec;
3375
3376    #[test]
3377    fn test_anomaly_type_category() {
3378        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3379        assert_eq!(fraud.category(), "Fraud");
3380        assert!(fraud.is_intentional());
3381
3382        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3383        assert_eq!(error.category(), "Error");
3384        assert!(!error.is_intentional());
3385    }
3386
3387    #[test]
3388    fn test_labeled_anomaly() {
3389        let anomaly = LabeledAnomaly::new(
3390            "ANO001".to_string(),
3391            AnomalyType::Fraud(FraudType::SelfApproval),
3392            "JE001".to_string(),
3393            "JE".to_string(),
3394            "1000".to_string(),
3395            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3396        )
3397        .with_description("User approved their own expense report")
3398        .with_related_entity("USER001");
3399
3400        assert_eq!(anomaly.severity, 3);
3401        assert!(anomaly.is_injected);
3402        assert_eq!(anomaly.related_entities.len(), 1);
3403    }
3404
3405    #[test]
3406    fn test_labeled_anomaly_with_provenance() {
3407        let anomaly = LabeledAnomaly::new(
3408            "ANO001".to_string(),
3409            AnomalyType::Fraud(FraudType::SelfApproval),
3410            "JE001".to_string(),
3411            "JE".to_string(),
3412            "1000".to_string(),
3413            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3414        )
3415        .with_run_id("run-123")
3416        .with_generation_seed(42)
3417        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3418        .with_structured_strategy(InjectionStrategy::SelfApproval {
3419            user_id: "USER001".to_string(),
3420        })
3421        .with_scenario("scenario-001")
3422        .with_original_document_hash("abc123");
3423
3424        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3425        assert_eq!(anomaly.generation_seed, Some(42));
3426        assert!(anomaly.causal_reason.is_some());
3427        assert!(anomaly.structured_strategy.is_some());
3428        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3429        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3430
3431        // Check that legacy injection_strategy is also set
3432        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3433    }
3434
3435    #[test]
3436    fn test_labeled_anomaly_derivation_chain() {
3437        let parent = LabeledAnomaly::new(
3438            "ANO001".to_string(),
3439            AnomalyType::Fraud(FraudType::DuplicatePayment),
3440            "JE001".to_string(),
3441            "JE".to_string(),
3442            "1000".to_string(),
3443            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3444        );
3445
3446        let child = LabeledAnomaly::new(
3447            "ANO002".to_string(),
3448            AnomalyType::Error(ErrorType::DuplicateEntry),
3449            "JE002".to_string(),
3450            "JE".to_string(),
3451            "1000".to_string(),
3452            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3453        )
3454        .with_parent_anomaly(&parent.anomaly_id);
3455
3456        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3457    }
3458
3459    #[test]
3460    fn test_injection_strategy_description() {
3461        let strategy = InjectionStrategy::AmountManipulation {
3462            original: dec!(1000),
3463            factor: 2.5,
3464        };
3465        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3466        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3467
3468        let strategy = InjectionStrategy::ThresholdAvoidance {
3469            threshold: dec!(10000),
3470            adjusted_amount: dec!(9999),
3471        };
3472        assert_eq!(
3473            strategy.description(),
3474            "Amount adjusted to avoid 10000 threshold"
3475        );
3476
3477        let strategy = InjectionStrategy::DateShift {
3478            days_shifted: -5,
3479            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3480        };
3481        assert_eq!(strategy.description(), "Date backdated by 5 days");
3482
3483        let strategy = InjectionStrategy::DateShift {
3484            days_shifted: 3,
3485            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3486        };
3487        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3488    }
3489
3490    #[test]
3491    fn test_causal_reason_variants() {
3492        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3493        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3494            assert!((base_rate - 0.02).abs() < 0.001);
3495        }
3496
3497        let reason = AnomalyCausalReason::TemporalPattern {
3498            pattern_name: "year_end_spike".to_string(),
3499        };
3500        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3501            assert_eq!(pattern_name, "year_end_spike");
3502        }
3503
3504        let reason = AnomalyCausalReason::ScenarioStep {
3505            scenario_type: "kickback".to_string(),
3506            step_number: 3,
3507        };
3508        if let AnomalyCausalReason::ScenarioStep {
3509            scenario_type,
3510            step_number,
3511        } = reason
3512        {
3513            assert_eq!(scenario_type, "kickback");
3514            assert_eq!(step_number, 3);
3515        }
3516    }
3517
3518    #[test]
3519    fn test_feature_vector_length() {
3520        let anomaly = LabeledAnomaly::new(
3521            "ANO001".to_string(),
3522            AnomalyType::Fraud(FraudType::SelfApproval),
3523            "JE001".to_string(),
3524            "JE".to_string(),
3525            "1000".to_string(),
3526            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3527        );
3528
3529        let features = anomaly.to_features();
3530        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3531        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3532    }
3533
3534    #[test]
3535    fn test_feature_vector_with_provenance() {
3536        let anomaly = LabeledAnomaly::new(
3537            "ANO001".to_string(),
3538            AnomalyType::Fraud(FraudType::SelfApproval),
3539            "JE001".to_string(),
3540            "JE".to_string(),
3541            "1000".to_string(),
3542            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3543        )
3544        .with_scenario("scenario-001")
3545        .with_parent_anomaly("ANO000");
3546
3547        let features = anomaly.to_features();
3548
3549        // Last two features should be 1.0 (has scenario, has parent)
3550        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3551        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3552    }
3553
3554    #[test]
3555    fn test_anomaly_summary() {
3556        let anomalies = vec![
3557            LabeledAnomaly::new(
3558                "ANO001".to_string(),
3559                AnomalyType::Fraud(FraudType::SelfApproval),
3560                "JE001".to_string(),
3561                "JE".to_string(),
3562                "1000".to_string(),
3563                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3564            ),
3565            LabeledAnomaly::new(
3566                "ANO002".to_string(),
3567                AnomalyType::Error(ErrorType::DuplicateEntry),
3568                "JE002".to_string(),
3569                "JE".to_string(),
3570                "1000".to_string(),
3571                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3572            ),
3573        ];
3574
3575        let summary = AnomalySummary::from_anomalies(&anomalies);
3576
3577        assert_eq!(summary.total_count, 2);
3578        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3579        assert_eq!(summary.by_category.get("Error"), Some(&1));
3580    }
3581
3582    #[test]
3583    fn test_rate_config_validation() {
3584        let config = AnomalyRateConfig::default();
3585        assert!(config.validate().is_ok());
3586
3587        let bad_config = AnomalyRateConfig {
3588            fraud_rate: 0.5,
3589            error_rate: 0.5,
3590            process_issue_rate: 0.5, // Sum > 1.0
3591            ..Default::default()
3592        };
3593        assert!(bad_config.validate().is_err());
3594    }
3595
3596    #[test]
3597    fn test_injection_strategy_serialization() {
3598        let strategy = InjectionStrategy::SoDViolation {
3599            duty1: "CreatePO".to_string(),
3600            duty2: "ApprovePO".to_string(),
3601            violating_user: "USER001".to_string(),
3602        };
3603
3604        let json = serde_json::to_string(&strategy).unwrap();
3605        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3606
3607        assert_eq!(strategy, deserialized);
3608    }
3609
3610    #[test]
3611    fn test_labeled_anomaly_serialization_with_provenance() {
3612        let anomaly = LabeledAnomaly::new(
3613            "ANO001".to_string(),
3614            AnomalyType::Fraud(FraudType::SelfApproval),
3615            "JE001".to_string(),
3616            "JE".to_string(),
3617            "1000".to_string(),
3618            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3619        )
3620        .with_run_id("run-123")
3621        .with_generation_seed(42)
3622        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3623
3624        let json = serde_json::to_string(&anomaly).unwrap();
3625        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3626
3627        assert_eq!(anomaly.run_id, deserialized.run_id);
3628        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3629    }
3630
3631    // ========================================
3632    // FR-003 ENHANCED TAXONOMY TESTS
3633    // ========================================
3634
3635    #[test]
3636    fn test_anomaly_category_from_anomaly_type() {
3637        // Fraud mappings
3638        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3639        assert_eq!(
3640            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3641            AnomalyCategory::FictitiousVendor
3642        );
3643
3644        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3645        assert_eq!(
3646            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3647            AnomalyCategory::VendorKickback
3648        );
3649
3650        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3651        assert_eq!(
3652            AnomalyCategory::from_anomaly_type(&fraud_structured),
3653            AnomalyCategory::StructuredTransaction
3654        );
3655
3656        // Error mappings
3657        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3658        assert_eq!(
3659            AnomalyCategory::from_anomaly_type(&error_duplicate),
3660            AnomalyCategory::DuplicatePayment
3661        );
3662
3663        // Process issue mappings
3664        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3665        assert_eq!(
3666            AnomalyCategory::from_anomaly_type(&process_skip),
3667            AnomalyCategory::MissingApproval
3668        );
3669
3670        // Relational mappings
3671        let relational_circular =
3672            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3673        assert_eq!(
3674            AnomalyCategory::from_anomaly_type(&relational_circular),
3675            AnomalyCategory::CircularFlow
3676        );
3677    }
3678
3679    #[test]
3680    fn test_anomaly_category_ordinal() {
3681        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3682        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3683        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3684    }
3685
3686    #[test]
3687    fn test_contributing_factor() {
3688        let factor = ContributingFactor::new(
3689            FactorType::AmountDeviation,
3690            15000.0,
3691            10000.0,
3692            true,
3693            0.5,
3694            "Amount exceeds threshold",
3695        );
3696
3697        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3698        assert_eq!(factor.value, 15000.0);
3699        assert_eq!(factor.threshold, 10000.0);
3700        assert!(factor.direction_greater);
3701
3702        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3703        let contribution = factor.contribution();
3704        assert!((contribution - 0.25).abs() < 0.01);
3705    }
3706
3707    #[test]
3708    fn test_contributing_factor_with_evidence() {
3709        let mut data = HashMap::new();
3710        data.insert("expected".to_string(), "10000".to_string());
3711        data.insert("actual".to_string(), "15000".to_string());
3712
3713        let factor = ContributingFactor::new(
3714            FactorType::AmountDeviation,
3715            15000.0,
3716            10000.0,
3717            true,
3718            0.5,
3719            "Amount deviation detected",
3720        )
3721        .with_evidence("transaction_history", data);
3722
3723        assert!(factor.evidence.is_some());
3724        let evidence = factor.evidence.unwrap();
3725        assert_eq!(evidence.source, "transaction_history");
3726        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3727    }
3728
3729    #[test]
3730    fn test_enhanced_anomaly_label() {
3731        let base = LabeledAnomaly::new(
3732            "ANO001".to_string(),
3733            AnomalyType::Fraud(FraudType::DuplicatePayment),
3734            "JE001".to_string(),
3735            "JE".to_string(),
3736            "1000".to_string(),
3737            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3738        );
3739
3740        let enhanced = EnhancedAnomalyLabel::from_base(base)
3741            .with_confidence(0.85)
3742            .with_severity(0.7)
3743            .with_factor(ContributingFactor::new(
3744                FactorType::DuplicateIndicator,
3745                1.0,
3746                0.5,
3747                true,
3748                0.4,
3749                "Duplicate payment detected",
3750            ))
3751            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3752
3753        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3754        assert_eq!(enhanced.enhanced_confidence, 0.85);
3755        assert_eq!(enhanced.enhanced_severity, 0.7);
3756        assert_eq!(enhanced.contributing_factors.len(), 1);
3757        assert_eq!(enhanced.secondary_categories.len(), 1);
3758    }
3759
3760    #[test]
3761    fn test_enhanced_anomaly_label_features() {
3762        let base = LabeledAnomaly::new(
3763            "ANO001".to_string(),
3764            AnomalyType::Fraud(FraudType::SelfApproval),
3765            "JE001".to_string(),
3766            "JE".to_string(),
3767            "1000".to_string(),
3768            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3769        );
3770
3771        let enhanced = EnhancedAnomalyLabel::from_base(base)
3772            .with_confidence(0.9)
3773            .with_severity(0.8)
3774            .with_factor(ContributingFactor::new(
3775                FactorType::ControlBypass,
3776                1.0,
3777                0.0,
3778                true,
3779                0.5,
3780                "Control bypass detected",
3781            ));
3782
3783        let features = enhanced.to_features();
3784
3785        // Should have 25 features (15 base + 10 enhanced)
3786        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3787        assert_eq!(features.len(), 25);
3788
3789        // Check enhanced confidence is in features
3790        assert_eq!(features[15], 0.9); // enhanced_confidence
3791
3792        // Check has_control_bypass flag
3793        assert_eq!(features[21], 1.0); // has_control_bypass
3794    }
3795
3796    #[test]
3797    fn test_enhanced_anomaly_label_feature_names() {
3798        let names = EnhancedAnomalyLabel::feature_names();
3799        assert_eq!(names.len(), 25);
3800        assert!(names.contains(&"enhanced_confidence"));
3801        assert!(names.contains(&"enhanced_severity"));
3802        assert!(names.contains(&"has_control_bypass"));
3803    }
3804
3805    #[test]
3806    fn test_factor_type_names() {
3807        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3808        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3809        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3810    }
3811
3812    #[test]
3813    fn test_anomaly_category_serialization() {
3814        let category = AnomalyCategory::CircularFlow;
3815        let json = serde_json::to_string(&category).unwrap();
3816        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3817        assert_eq!(category, deserialized);
3818
3819        let custom = AnomalyCategory::Custom("custom_type".to_string());
3820        let json = serde_json::to_string(&custom).unwrap();
3821        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3822        assert_eq!(custom, deserialized);
3823    }
3824
3825    #[test]
3826    fn test_enhanced_label_secondary_category_dedup() {
3827        let base = LabeledAnomaly::new(
3828            "ANO001".to_string(),
3829            AnomalyType::Fraud(FraudType::DuplicatePayment),
3830            "JE001".to_string(),
3831            "JE".to_string(),
3832            "1000".to_string(),
3833            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3834        );
3835
3836        let enhanced = EnhancedAnomalyLabel::from_base(base)
3837            // Try to add the primary category as secondary (should be ignored)
3838            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3839            // Add a valid secondary
3840            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3841            // Try to add duplicate secondary (should be ignored)
3842            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3843
3844        // Should only have 1 secondary category (TimingAnomaly)
3845        assert_eq!(enhanced.secondary_categories.len(), 1);
3846        assert_eq!(
3847            enhanced.secondary_categories[0],
3848            AnomalyCategory::TimingAnomaly
3849        );
3850    }
3851
3852    // ==========================================================================
3853    // Accounting Standards Fraud Type Tests
3854    // ==========================================================================
3855
3856    #[test]
3857    fn test_revenue_recognition_fraud_types() {
3858        // Test ASC 606/IFRS 15 related fraud types
3859        let fraud_types = [
3860            FraudType::ImproperRevenueRecognition,
3861            FraudType::ImproperPoAllocation,
3862            FraudType::VariableConsiderationManipulation,
3863            FraudType::ContractModificationMisstatement,
3864        ];
3865
3866        for fraud_type in fraud_types {
3867            let anomaly_type = AnomalyType::Fraud(fraud_type);
3868            assert_eq!(anomaly_type.category(), "Fraud");
3869            assert!(anomaly_type.is_intentional());
3870            assert!(anomaly_type.severity() >= 3);
3871        }
3872    }
3873
3874    #[test]
3875    fn test_lease_accounting_fraud_types() {
3876        // Test ASC 842/IFRS 16 related fraud types
3877        let fraud_types = [
3878            FraudType::LeaseClassificationManipulation,
3879            FraudType::OffBalanceSheetLease,
3880            FraudType::LeaseLiabilityUnderstatement,
3881            FraudType::RouAssetMisstatement,
3882        ];
3883
3884        for fraud_type in fraud_types {
3885            let anomaly_type = AnomalyType::Fraud(fraud_type);
3886            assert_eq!(anomaly_type.category(), "Fraud");
3887            assert!(anomaly_type.is_intentional());
3888            assert!(anomaly_type.severity() >= 3);
3889        }
3890
3891        // Off-balance sheet lease fraud should be high severity
3892        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3893    }
3894
3895    #[test]
3896    fn test_fair_value_fraud_types() {
3897        // Test ASC 820/IFRS 13 related fraud types
3898        let fraud_types = [
3899            FraudType::FairValueHierarchyManipulation,
3900            FraudType::Level3InputManipulation,
3901            FraudType::ValuationTechniqueManipulation,
3902        ];
3903
3904        for fraud_type in fraud_types {
3905            let anomaly_type = AnomalyType::Fraud(fraud_type);
3906            assert_eq!(anomaly_type.category(), "Fraud");
3907            assert!(anomaly_type.is_intentional());
3908            assert!(anomaly_type.severity() >= 4);
3909        }
3910
3911        // Level 3 manipulation is highest severity (unobservable inputs)
3912        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3913    }
3914
3915    #[test]
3916    fn test_impairment_fraud_types() {
3917        // Test ASC 360/IAS 36 related fraud types
3918        let fraud_types = [
3919            FraudType::DelayedImpairment,
3920            FraudType::ImpairmentTestAvoidance,
3921            FraudType::CashFlowProjectionManipulation,
3922            FraudType::ImproperImpairmentReversal,
3923        ];
3924
3925        for fraud_type in fraud_types {
3926            let anomaly_type = AnomalyType::Fraud(fraud_type);
3927            assert_eq!(anomaly_type.category(), "Fraud");
3928            assert!(anomaly_type.is_intentional());
3929            assert!(anomaly_type.severity() >= 3);
3930        }
3931
3932        // Cash flow manipulation has highest severity
3933        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3934    }
3935
3936    // ==========================================================================
3937    // Accounting Standards Error Type Tests
3938    // ==========================================================================
3939
3940    #[test]
3941    fn test_standards_error_types() {
3942        // Test non-fraudulent accounting standards errors
3943        let error_types = [
3944            ErrorType::RevenueTimingError,
3945            ErrorType::PoAllocationError,
3946            ErrorType::LeaseClassificationError,
3947            ErrorType::LeaseCalculationError,
3948            ErrorType::FairValueError,
3949            ErrorType::ImpairmentCalculationError,
3950            ErrorType::DiscountRateError,
3951            ErrorType::FrameworkApplicationError,
3952        ];
3953
3954        for error_type in error_types {
3955            let anomaly_type = AnomalyType::Error(error_type);
3956            assert_eq!(anomaly_type.category(), "Error");
3957            assert!(!anomaly_type.is_intentional());
3958            assert!(anomaly_type.severity() >= 3);
3959        }
3960    }
3961
3962    #[test]
3963    fn test_framework_application_error() {
3964        // Test IFRS vs GAAP confusion errors
3965        let error_type = ErrorType::FrameworkApplicationError;
3966        assert_eq!(error_type.severity(), 4);
3967
3968        let anomaly = LabeledAnomaly::new(
3969            "ERR001".to_string(),
3970            AnomalyType::Error(error_type),
3971            "JE100".to_string(),
3972            "JE".to_string(),
3973            "1000".to_string(),
3974            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3975        )
3976        .with_description("LIFO inventory method used under IFRS (not permitted)")
3977        .with_metadata("framework", "IFRS")
3978        .with_metadata("standard_violated", "IAS 2");
3979
3980        assert_eq!(anomaly.anomaly_type.category(), "Error");
3981        assert_eq!(
3982            anomaly.metadata.get("standard_violated"),
3983            Some(&"IAS 2".to_string())
3984        );
3985    }
3986
3987    #[test]
3988    fn test_standards_anomaly_serialization() {
3989        // Test that new fraud types serialize/deserialize correctly
3990        let fraud_types = [
3991            FraudType::ImproperRevenueRecognition,
3992            FraudType::LeaseClassificationManipulation,
3993            FraudType::FairValueHierarchyManipulation,
3994            FraudType::DelayedImpairment,
3995        ];
3996
3997        for fraud_type in fraud_types {
3998            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
3999            let deserialized: FraudType =
4000                serde_json::from_str(&json).expect("Failed to deserialize");
4001            assert_eq!(fraud_type, deserialized);
4002        }
4003
4004        // Test error types
4005        let error_types = [
4006            ErrorType::RevenueTimingError,
4007            ErrorType::LeaseCalculationError,
4008            ErrorType::FairValueError,
4009            ErrorType::FrameworkApplicationError,
4010        ];
4011
4012        for error_type in error_types {
4013            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4014            let deserialized: ErrorType =
4015                serde_json::from_str(&json).expect("Failed to deserialize");
4016            assert_eq!(error_type, deserialized);
4017        }
4018    }
4019
4020    #[test]
4021    fn test_standards_labeled_anomaly() {
4022        // Test creating a labeled anomaly for a standards violation
4023        let anomaly = LabeledAnomaly::new(
4024            "STD001".to_string(),
4025            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4026            "CONTRACT-2024-001".to_string(),
4027            "Revenue".to_string(),
4028            "1000".to_string(),
4029            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4030        )
4031        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4032        .with_monetary_impact(dec!(500000))
4033        .with_metadata("standard", "ASC 606")
4034        .with_metadata("paragraph", "606-10-25-1")
4035        .with_metadata("contract_id", "C-2024-001")
4036        .with_related_entity("CONTRACT-2024-001")
4037        .with_related_entity("CUSTOMER-500");
4038
4039        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4040        assert!(anomaly.is_injected);
4041        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4042        assert_eq!(anomaly.related_entities.len(), 2);
4043        assert_eq!(
4044            anomaly.metadata.get("standard"),
4045            Some(&"ASC 606".to_string())
4046        );
4047    }
4048
4049    // ==========================================================================
4050    // Multi-Dimensional Labeling Tests
4051    // ==========================================================================
4052
4053    #[test]
4054    fn test_severity_level() {
4055        assert_eq!(SeverityLevel::Low.numeric(), 1);
4056        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4057
4058        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4059        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4060
4061        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4062        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4063
4064        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4065    }
4066
4067    #[test]
4068    fn test_anomaly_severity() {
4069        let severity =
4070            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4071
4072        assert_eq!(severity.level, SeverityLevel::High);
4073        assert!(severity.is_material);
4074        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4075
4076        // Not material
4077        let low_severity =
4078            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4079        assert!(!low_severity.is_material);
4080    }
4081
4082    #[test]
4083    fn test_detection_difficulty() {
4084        assert!(
4085            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4086        );
4087        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4088
4089        assert_eq!(
4090            AnomalyDetectionDifficulty::from_score(0.05),
4091            AnomalyDetectionDifficulty::Trivial
4092        );
4093        assert_eq!(
4094            AnomalyDetectionDifficulty::from_score(0.90),
4095            AnomalyDetectionDifficulty::Expert
4096        );
4097
4098        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4099    }
4100
4101    #[test]
4102    fn test_ground_truth_certainty() {
4103        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4104        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4105        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4106    }
4107
4108    #[test]
4109    fn test_detection_method() {
4110        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4111        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4112    }
4113
4114    #[test]
4115    fn test_extended_anomaly_label() {
4116        let base = LabeledAnomaly::new(
4117            "ANO001".to_string(),
4118            AnomalyType::Fraud(FraudType::FictitiousVendor),
4119            "JE001".to_string(),
4120            "JE".to_string(),
4121            "1000".to_string(),
4122            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4123        )
4124        .with_monetary_impact(dec!(100000));
4125
4126        let extended = ExtendedAnomalyLabel::from_base(base)
4127            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4128            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4129            .with_method(DetectionMethod::GraphBased)
4130            .with_method(DetectionMethod::ForensicAudit)
4131            .with_indicator("New vendor with no history")
4132            .with_indicator("Large first transaction")
4133            .with_certainty(GroundTruthCertainty::Definite)
4134            .with_entity("V001")
4135            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4136            .with_scheme("SCHEME001", 2);
4137
4138        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4139        assert_eq!(
4140            extended.detection_difficulty,
4141            AnomalyDetectionDifficulty::Hard
4142        );
4143        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4144        assert_eq!(extended.recommended_methods.len(), 3);
4145        assert_eq!(extended.key_indicators.len(), 2);
4146        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4147        assert_eq!(extended.scheme_stage, Some(2));
4148    }
4149
4150    #[test]
4151    fn test_extended_anomaly_label_features() {
4152        let base = LabeledAnomaly::new(
4153            "ANO001".to_string(),
4154            AnomalyType::Fraud(FraudType::SelfApproval),
4155            "JE001".to_string(),
4156            "JE".to_string(),
4157            "1000".to_string(),
4158            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4159        );
4160
4161        let extended =
4162            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4163
4164        let features = extended.to_features();
4165        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4166        assert_eq!(features.len(), 30);
4167
4168        // Check difficulty score is in features
4169        let difficulty_idx = 18; // Position of difficulty_score
4170        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4171    }
4172
4173    #[test]
4174    fn test_extended_label_near_miss() {
4175        let base = LabeledAnomaly::new(
4176            "ANO001".to_string(),
4177            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4178            "JE001".to_string(),
4179            "JE".to_string(),
4180            "1000".to_string(),
4181            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4182        );
4183
4184        let extended = ExtendedAnomalyLabel::from_base(base)
4185            .as_near_miss("Year-end bonus payment, legitimately high");
4186
4187        assert!(extended.is_near_miss);
4188        assert!(extended.near_miss_explanation.is_some());
4189    }
4190
4191    #[test]
4192    fn test_scheme_type() {
4193        assert_eq!(
4194            SchemeType::GradualEmbezzlement.name(),
4195            "gradual_embezzlement"
4196        );
4197        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4198        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4199    }
4200
4201    #[test]
4202    fn test_concealment_technique() {
4203        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4204        assert!(
4205            ConcealmentTechnique::Collusion.difficulty_bonus()
4206                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4207        );
4208    }
4209
4210    #[test]
4211    fn test_near_miss_label() {
4212        let near_miss = NearMissLabel::new(
4213            "JE001",
4214            NearMissPattern::ThresholdProximity {
4215                threshold: dec!(10000),
4216                proximity: 0.95,
4217            },
4218            0.7,
4219            FalsePositiveTrigger::AmountNearThreshold,
4220            "Transaction is 95% of threshold but business justified",
4221        );
4222
4223        assert_eq!(near_miss.document_id, "JE001");
4224        assert_eq!(near_miss.suspicion_score, 0.7);
4225        assert_eq!(
4226            near_miss.false_positive_trigger,
4227            FalsePositiveTrigger::AmountNearThreshold
4228        );
4229    }
4230
4231    #[test]
4232    fn test_legitimate_pattern_type() {
4233        assert_eq!(
4234            LegitimatePatternType::YearEndBonus.description(),
4235            "Year-end bonus payment"
4236        );
4237        assert_eq!(
4238            LegitimatePatternType::InsuranceClaim.description(),
4239            "Insurance claim reimbursement"
4240        );
4241    }
4242
4243    #[test]
4244    fn test_severity_detection_difficulty_serialization() {
4245        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4246        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4247        let deserialized: AnomalySeverity =
4248            serde_json::from_str(&json).expect("Failed to deserialize");
4249        assert_eq!(severity.level, deserialized.level);
4250
4251        let difficulty = AnomalyDetectionDifficulty::Hard;
4252        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4253        let deserialized: AnomalyDetectionDifficulty =
4254            serde_json::from_str(&json).expect("Failed to deserialize");
4255        assert_eq!(difficulty, deserialized);
4256    }
4257
4258    // ========================================
4259    // ACFE Taxonomy Tests
4260    // ========================================
4261
4262    #[test]
4263    fn test_acfe_fraud_category() {
4264        let asset = AcfeFraudCategory::AssetMisappropriation;
4265        assert_eq!(asset.name(), "asset_misappropriation");
4266        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4267        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4268        assert_eq!(asset.typical_detection_months(), 12);
4269
4270        let corruption = AcfeFraudCategory::Corruption;
4271        assert_eq!(corruption.name(), "corruption");
4272        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4273
4274        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4275        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4276        assert_eq!(fs_fraud.typical_detection_months(), 24);
4277    }
4278
4279    #[test]
4280    fn test_cash_fraud_scheme() {
4281        let shell = CashFraudScheme::ShellCompany;
4282        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4283        assert_eq!(shell.subcategory(), "billing_schemes");
4284        assert_eq!(shell.severity(), 5);
4285        assert_eq!(
4286            shell.detection_difficulty(),
4287            AnomalyDetectionDifficulty::Hard
4288        );
4289
4290        let ghost = CashFraudScheme::GhostEmployee;
4291        assert_eq!(ghost.subcategory(), "payroll_schemes");
4292        assert_eq!(ghost.severity(), 5);
4293
4294        // Test all variants exist
4295        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4296    }
4297
4298    #[test]
4299    fn test_asset_fraud_scheme() {
4300        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4301        assert_eq!(
4302            ip_theft.category(),
4303            AcfeFraudCategory::AssetMisappropriation
4304        );
4305        assert_eq!(ip_theft.subcategory(), "other_assets");
4306        assert_eq!(ip_theft.severity(), 5);
4307
4308        let inv_theft = AssetFraudScheme::InventoryTheft;
4309        assert_eq!(inv_theft.subcategory(), "inventory");
4310        assert_eq!(inv_theft.severity(), 4);
4311    }
4312
4313    #[test]
4314    fn test_corruption_scheme() {
4315        let kickback = CorruptionScheme::InvoiceKickback;
4316        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4317        assert_eq!(kickback.subcategory(), "bribery");
4318        assert_eq!(kickback.severity(), 5);
4319        assert_eq!(
4320            kickback.detection_difficulty(),
4321            AnomalyDetectionDifficulty::Expert
4322        );
4323
4324        let bid_rigging = CorruptionScheme::BidRigging;
4325        assert_eq!(bid_rigging.subcategory(), "bribery");
4326        assert_eq!(
4327            bid_rigging.detection_difficulty(),
4328            AnomalyDetectionDifficulty::Hard
4329        );
4330
4331        let purchasing = CorruptionScheme::PurchasingConflict;
4332        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4333
4334        // Test all variants exist
4335        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4336    }
4337
4338    #[test]
4339    fn test_financial_statement_scheme() {
4340        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4341        assert_eq!(
4342            fictitious.category(),
4343            AcfeFraudCategory::FinancialStatementFraud
4344        );
4345        assert_eq!(fictitious.subcategory(), "overstatement");
4346        assert_eq!(fictitious.severity(), 5);
4347        assert_eq!(
4348            fictitious.detection_difficulty(),
4349            AnomalyDetectionDifficulty::Expert
4350        );
4351
4352        let understated = FinancialStatementScheme::UnderstatedRevenues;
4353        assert_eq!(understated.subcategory(), "understatement");
4354
4355        // Test all variants exist
4356        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4357    }
4358
4359    #[test]
4360    fn test_acfe_scheme_unified() {
4361        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4362        assert_eq!(
4363            cash_scheme.category(),
4364            AcfeFraudCategory::AssetMisappropriation
4365        );
4366        assert_eq!(cash_scheme.severity(), 5);
4367
4368        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4369        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4370
4371        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4372        assert_eq!(
4373            fs_scheme.category(),
4374            AcfeFraudCategory::FinancialStatementFraud
4375        );
4376    }
4377
4378    #[test]
4379    fn test_acfe_detection_method() {
4380        let tip = AcfeDetectionMethod::Tip;
4381        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4382
4383        let internal_audit = AcfeDetectionMethod::InternalAudit;
4384        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4385
4386        let external_audit = AcfeDetectionMethod::ExternalAudit;
4387        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4388
4389        // Test all variants exist
4390        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4391    }
4392
4393    #[test]
4394    fn test_perpetrator_department() {
4395        let accounting = PerpetratorDepartment::Accounting;
4396        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4397        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4398
4399        let executive = PerpetratorDepartment::Executive;
4400        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4401    }
4402
4403    #[test]
4404    fn test_perpetrator_level() {
4405        let employee = PerpetratorLevel::Employee;
4406        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4407        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4408
4409        let exec = PerpetratorLevel::OwnerExecutive;
4410        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4411    }
4412
4413    #[test]
4414    fn test_acfe_calibration() {
4415        let cal = AcfeCalibration::default();
4416        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4417        assert_eq!(cal.median_duration_months, 12);
4418        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4419        assert!(cal.validate().is_ok());
4420
4421        // Test custom calibration
4422        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4423        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4424        assert_eq!(custom_cal.median_duration_months, 18);
4425
4426        // Test validation failure
4427        let bad_cal = AcfeCalibration {
4428            collusion_rate: 1.5,
4429            ..Default::default()
4430        };
4431        assert!(bad_cal.validate().is_err());
4432    }
4433
4434    #[test]
4435    fn test_fraud_triangle() {
4436        let triangle = FraudTriangle::new(
4437            PressureType::FinancialTargets,
4438            vec![
4439                OpportunityFactor::WeakInternalControls,
4440                OpportunityFactor::ManagementOverride,
4441            ],
4442            Rationalization::ForTheCompanyGood,
4443        );
4444
4445        // Risk score should be between 0 and 1
4446        let risk = triangle.risk_score();
4447        assert!((0.0..=1.0).contains(&risk));
4448        // Should be relatively high given the components
4449        assert!(risk > 0.5);
4450    }
4451
4452    #[test]
4453    fn test_pressure_types() {
4454        let financial = PressureType::FinancialTargets;
4455        assert!(financial.risk_weight() > 0.5);
4456
4457        let gambling = PressureType::GamblingAddiction;
4458        assert_eq!(gambling.risk_weight(), 0.90);
4459    }
4460
4461    #[test]
4462    fn test_opportunity_factors() {
4463        let override_factor = OpportunityFactor::ManagementOverride;
4464        assert_eq!(override_factor.risk_weight(), 0.90);
4465
4466        let weak_controls = OpportunityFactor::WeakInternalControls;
4467        assert!(weak_controls.risk_weight() > 0.8);
4468    }
4469
4470    #[test]
4471    fn test_rationalizations() {
4472        let entitlement = Rationalization::Entitlement;
4473        assert!(entitlement.risk_weight() > 0.8);
4474
4475        let borrowing = Rationalization::TemporaryBorrowing;
4476        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4477    }
4478
4479    #[test]
4480    fn test_acfe_scheme_serialization() {
4481        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4482        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4483        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4484        assert_eq!(scheme, deserialized);
4485
4486        let calibration = AcfeCalibration::default();
4487        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4488        let deserialized: AcfeCalibration =
4489            serde_json::from_str(&json).expect("Failed to deserialize");
4490        assert_eq!(calibration.median_loss, deserialized.median_loss);
4491    }
4492}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs