datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {:.2}", factor)
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {} threshold", threshold)
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {} days", days_shifted)
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {}", user_id)
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {} and {}", duty1, duty2)
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {}", original_doc_id)
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!(
194                    "Near-duplicate of {} (varied: {:?})",
195                    original_doc_id, varied_fields
196                )
197            }
198            InjectionStrategy::CircularFlow { entity_chain } => {
199                format!("Circular flow through {} entities", entity_chain.len())
200            }
201            InjectionStrategy::SplitTransaction { split_count, .. } => {
202                format!("Split into {} transactions", split_count)
203            }
204            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
205            InjectionStrategy::TimingManipulation { timing_type, .. } => {
206                format!("Timing manipulation: {}", timing_type)
207            }
208            InjectionStrategy::AccountMisclassification {
209                correct_account,
210                incorrect_account,
211            } => {
212                format!(
213                    "Misclassified from {} to {}",
214                    correct_account, incorrect_account
215                )
216            }
217            InjectionStrategy::MissingField { field_name } => {
218                format!("Missing required field: {}", field_name)
219            }
220            InjectionStrategy::Custom { name, .. } => format!("Custom: {}", name),
221        }
222    }
223
224    /// Returns the strategy type name.
225    pub fn strategy_type(&self) -> &'static str {
226        match self {
227            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
228            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
229            InjectionStrategy::DateShift { .. } => "DateShift",
230            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
231            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
232            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
233            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
234            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
235            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
236            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
237            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
238            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
239            InjectionStrategy::MissingField { .. } => "MissingField",
240            InjectionStrategy::Custom { .. } => "Custom",
241        }
242    }
243}
244
245/// Primary anomaly classification.
246#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
247pub enum AnomalyType {
248    /// Fraudulent activity.
249    Fraud(FraudType),
250    /// Data entry or processing error.
251    Error(ErrorType),
252    /// Process or control issue.
253    ProcessIssue(ProcessIssueType),
254    /// Statistical anomaly.
255    Statistical(StatisticalAnomalyType),
256    /// Relational/graph anomaly.
257    Relational(RelationalAnomalyType),
258    /// Custom anomaly type.
259    Custom(String),
260}
261
262impl AnomalyType {
263    /// Returns the category name.
264    pub fn category(&self) -> &'static str {
265        match self {
266            AnomalyType::Fraud(_) => "Fraud",
267            AnomalyType::Error(_) => "Error",
268            AnomalyType::ProcessIssue(_) => "ProcessIssue",
269            AnomalyType::Statistical(_) => "Statistical",
270            AnomalyType::Relational(_) => "Relational",
271            AnomalyType::Custom(_) => "Custom",
272        }
273    }
274
275    /// Returns the specific type name.
276    pub fn type_name(&self) -> String {
277        match self {
278            AnomalyType::Fraud(t) => format!("{:?}", t),
279            AnomalyType::Error(t) => format!("{:?}", t),
280            AnomalyType::ProcessIssue(t) => format!("{:?}", t),
281            AnomalyType::Statistical(t) => format!("{:?}", t),
282            AnomalyType::Relational(t) => format!("{:?}", t),
283            AnomalyType::Custom(s) => s.clone(),
284        }
285    }
286
287    /// Returns the severity level (1-5, 5 being most severe).
288    pub fn severity(&self) -> u8 {
289        match self {
290            AnomalyType::Fraud(t) => t.severity(),
291            AnomalyType::Error(t) => t.severity(),
292            AnomalyType::ProcessIssue(t) => t.severity(),
293            AnomalyType::Statistical(t) => t.severity(),
294            AnomalyType::Relational(t) => t.severity(),
295            AnomalyType::Custom(_) => 3,
296        }
297    }
298
299    /// Returns whether this anomaly is typically intentional.
300    pub fn is_intentional(&self) -> bool {
301        matches!(self, AnomalyType::Fraud(_))
302    }
303}
304
305/// Fraud types for detection training.
306#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
307pub enum FraudType {
308    // Journal Entry Fraud
309    /// Fictitious journal entry with no business purpose.
310    FictitiousEntry,
311    /// Fictitious transaction (alias for FictitiousEntry).
312    FictitiousTransaction,
313    /// Round-dollar amounts suggesting manual manipulation.
314    RoundDollarManipulation,
315    /// Entry posted just below approval threshold.
316    JustBelowThreshold,
317    /// Revenue recognition manipulation.
318    RevenueManipulation,
319    /// Expense capitalization fraud.
320    ImproperCapitalization,
321    /// Improperly capitalizing expenses as assets.
322    ExpenseCapitalization,
323    /// Cookie jar reserves manipulation.
324    ReserveManipulation,
325    /// Round-tripping funds through suspense/clearing accounts.
326    SuspenseAccountAbuse,
327    /// Splitting transactions to stay below approval thresholds.
328    SplitTransaction,
329    /// Unusual timing (weekend, holiday, after-hours postings).
330    TimingAnomaly,
331    /// Posting to unauthorized accounts.
332    UnauthorizedAccess,
333
334    // Approval Fraud
335    /// User approving their own request.
336    SelfApproval,
337    /// Approval beyond authorized limit.
338    ExceededApprovalLimit,
339    /// Segregation of duties violation.
340    SegregationOfDutiesViolation,
341    /// Approval by unauthorized user.
342    UnauthorizedApproval,
343    /// Collusion between approver and requester.
344    CollusiveApproval,
345
346    // Vendor/Payment Fraud
347    /// Fictitious vendor.
348    FictitiousVendor,
349    /// Duplicate payment to vendor.
350    DuplicatePayment,
351    /// Payment to shell company.
352    ShellCompanyPayment,
353    /// Kickback scheme.
354    Kickback,
355    /// Kickback scheme (alias).
356    KickbackScheme,
357    /// Invoice manipulation.
358    InvoiceManipulation,
359
360    // Asset Fraud
361    /// Misappropriation of assets.
362    AssetMisappropriation,
363    /// Inventory theft.
364    InventoryTheft,
365    /// Ghost employee.
366    GhostEmployee,
367
368    // Financial Statement Fraud
369    /// Premature revenue recognition.
370    PrematureRevenue,
371    /// Understated liabilities.
372    UnderstatedLiabilities,
373    /// Overstated assets.
374    OverstatedAssets,
375    /// Channel stuffing.
376    ChannelStuffing,
377
378    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
379    /// Improper revenue recognition timing (ASC 606/IFRS 15).
380    ImproperRevenueRecognition,
381    /// Multiple performance obligations not properly separated.
382    ImproperPoAllocation,
383    /// Variable consideration not properly estimated.
384    VariableConsiderationManipulation,
385    /// Contract modifications not properly accounted for.
386    ContractModificationMisstatement,
387
388    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
389    /// Lease classification manipulation (operating vs finance).
390    LeaseClassificationManipulation,
391    /// Off-balance sheet lease fraud.
392    OffBalanceSheetLease,
393    /// Lease liability understatement.
394    LeaseLiabilityUnderstatement,
395    /// ROU asset misstatement.
396    RouAssetMisstatement,
397
398    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
399    /// Fair value hierarchy misclassification.
400    FairValueHierarchyManipulation,
401    /// Level 3 input manipulation.
402    Level3InputManipulation,
403    /// Valuation technique manipulation.
404    ValuationTechniqueManipulation,
405
406    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
407    /// Delayed impairment recognition.
408    DelayedImpairment,
409    /// Improperly avoiding impairment testing.
410    ImpairmentTestAvoidance,
411    /// Cash flow projection manipulation for impairment.
412    CashFlowProjectionManipulation,
413    /// Improper impairment reversal (IFRS only).
414    ImproperImpairmentReversal,
415
416    // Sourcing/Procurement Fraud (S2C)
417    /// Bid rigging or collusion among bidders.
418    BidRigging,
419    /// Contracts with phantom/shell vendors.
420    PhantomVendorContract,
421    /// Splitting contracts to avoid approval thresholds.
422    SplitContractThreshold,
423    /// Conflict of interest in sourcing decisions.
424    ConflictOfInterestSourcing,
425
426    // HR/Payroll Fraud (H2R)
427    /// Ghost employee on payroll.
428    GhostEmployeePayroll,
429    /// Payroll inflation/unauthorized raises.
430    PayrollInflation,
431    /// Duplicate expense report submission.
432    DuplicateExpenseReport,
433    /// Fictitious expense claims.
434    FictitiousExpense,
435    /// Splitting expenses to avoid approval threshold.
436    SplitExpenseToAvoidApproval,
437
438    // O2C Fraud
439    /// Revenue timing manipulation via quotes.
440    RevenueTimingManipulation,
441    /// Overriding quote prices without authorization.
442    QuotePriceOverride,
443}
444
445impl FraudType {
446    /// Returns severity level (1-5).
447    pub fn severity(&self) -> u8 {
448        match self {
449            FraudType::RoundDollarManipulation => 2,
450            FraudType::JustBelowThreshold => 3,
451            FraudType::SelfApproval => 3,
452            FraudType::ExceededApprovalLimit => 3,
453            FraudType::DuplicatePayment => 3,
454            FraudType::FictitiousEntry => 4,
455            FraudType::RevenueManipulation => 5,
456            FraudType::FictitiousVendor => 5,
457            FraudType::ShellCompanyPayment => 5,
458            FraudType::AssetMisappropriation => 5,
459            FraudType::SegregationOfDutiesViolation => 4,
460            FraudType::CollusiveApproval => 5,
461            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
462            FraudType::ImproperRevenueRecognition => 5,
463            FraudType::ImproperPoAllocation => 4,
464            FraudType::VariableConsiderationManipulation => 4,
465            FraudType::ContractModificationMisstatement => 3,
466            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
467            FraudType::LeaseClassificationManipulation => 4,
468            FraudType::OffBalanceSheetLease => 5,
469            FraudType::LeaseLiabilityUnderstatement => 4,
470            FraudType::RouAssetMisstatement => 3,
471            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
472            FraudType::FairValueHierarchyManipulation => 4,
473            FraudType::Level3InputManipulation => 5,
474            FraudType::ValuationTechniqueManipulation => 4,
475            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
476            FraudType::DelayedImpairment => 4,
477            FraudType::ImpairmentTestAvoidance => 4,
478            FraudType::CashFlowProjectionManipulation => 5,
479            FraudType::ImproperImpairmentReversal => 3,
480            _ => 4,
481        }
482    }
483}
484
485/// Error types for error detection.
486#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
487pub enum ErrorType {
488    // Data Entry Errors
489    /// Duplicate document entry.
490    DuplicateEntry,
491    /// Reversed debit/credit amounts.
492    ReversedAmount,
493    /// Transposed digits in amount.
494    TransposedDigits,
495    /// Wrong decimal placement.
496    DecimalError,
497    /// Missing required field.
498    MissingField,
499    /// Invalid account code.
500    InvalidAccount,
501
502    // Timing Errors
503    /// Posted to wrong period.
504    WrongPeriod,
505    /// Backdated entry.
506    BackdatedEntry,
507    /// Future-dated entry.
508    FutureDatedEntry,
509    /// Cutoff error.
510    CutoffError,
511
512    // Classification Errors
513    /// Wrong account classification.
514    MisclassifiedAccount,
515    /// Wrong cost center.
516    WrongCostCenter,
517    /// Wrong company code.
518    WrongCompanyCode,
519
520    // Calculation Errors
521    /// Unbalanced journal entry.
522    UnbalancedEntry,
523    /// Rounding error.
524    RoundingError,
525    /// Currency conversion error.
526    CurrencyError,
527    /// Tax calculation error.
528    TaxCalculationError,
529
530    // Accounting Standards Errors (Non-Fraudulent)
531    /// Wrong revenue recognition timing (honest mistake).
532    RevenueTimingError,
533    /// Performance obligation allocation error.
534    PoAllocationError,
535    /// Lease classification error (operating vs finance).
536    LeaseClassificationError,
537    /// Lease calculation error (PV, amortization).
538    LeaseCalculationError,
539    /// Fair value measurement error.
540    FairValueError,
541    /// Impairment calculation error.
542    ImpairmentCalculationError,
543    /// Discount rate error.
544    DiscountRateError,
545    /// Framework application error (IFRS vs GAAP).
546    FrameworkApplicationError,
547}
548
549impl ErrorType {
550    /// Returns severity level (1-5).
551    pub fn severity(&self) -> u8 {
552        match self {
553            ErrorType::RoundingError => 1,
554            ErrorType::MissingField => 2,
555            ErrorType::TransposedDigits => 2,
556            ErrorType::DecimalError => 3,
557            ErrorType::DuplicateEntry => 3,
558            ErrorType::ReversedAmount => 3,
559            ErrorType::WrongPeriod => 4,
560            ErrorType::UnbalancedEntry => 5,
561            ErrorType::CurrencyError => 4,
562            // Accounting Standards Errors
563            ErrorType::RevenueTimingError => 4,
564            ErrorType::PoAllocationError => 3,
565            ErrorType::LeaseClassificationError => 3,
566            ErrorType::LeaseCalculationError => 3,
567            ErrorType::FairValueError => 4,
568            ErrorType::ImpairmentCalculationError => 4,
569            ErrorType::DiscountRateError => 3,
570            ErrorType::FrameworkApplicationError => 4,
571            _ => 3,
572        }
573    }
574}
575
576/// Process issue types.
577#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
578pub enum ProcessIssueType {
579    // Approval Issues
580    /// Approval skipped entirely.
581    SkippedApproval,
582    /// Late approval (after posting).
583    LateApproval,
584    /// Missing supporting documentation.
585    MissingDocumentation,
586    /// Incomplete approval chain.
587    IncompleteApprovalChain,
588
589    // Timing Issues
590    /// Late posting.
591    LatePosting,
592    /// Posting outside business hours.
593    AfterHoursPosting,
594    /// Weekend/holiday posting.
595    WeekendPosting,
596    /// Rushed period-end posting.
597    RushedPeriodEnd,
598
599    // Control Issues
600    /// Manual override of system control.
601    ManualOverride,
602    /// Unusual user access pattern.
603    UnusualAccess,
604    /// System bypass.
605    SystemBypass,
606    /// Batch processing anomaly.
607    BatchAnomaly,
608
609    // Documentation Issues
610    /// Vague or missing description.
611    VagueDescription,
612    /// Changed after posting.
613    PostFactoChange,
614    /// Incomplete audit trail.
615    IncompleteAuditTrail,
616
617    // Sourcing/Procurement Issues (S2C)
618    /// Purchasing outside of contracts (maverick spend).
619    MaverickSpend,
620    /// Purchasing against an expired contract.
621    ExpiredContractPurchase,
622    /// Overriding contracted price without authorization.
623    ContractPriceOverride,
624    /// Award given with only a single bid received.
625    SingleBidAward,
626    /// Bypassing supplier qualification requirements.
627    QualificationBypass,
628
629    // O2C Issues
630    /// Converting an expired quote to a sales order.
631    ExpiredQuoteConversion,
632}
633
634impl ProcessIssueType {
635    /// Returns severity level (1-5).
636    pub fn severity(&self) -> u8 {
637        match self {
638            ProcessIssueType::VagueDescription => 1,
639            ProcessIssueType::LatePosting => 2,
640            ProcessIssueType::AfterHoursPosting => 2,
641            ProcessIssueType::WeekendPosting => 2,
642            ProcessIssueType::SkippedApproval => 4,
643            ProcessIssueType::ManualOverride => 4,
644            ProcessIssueType::SystemBypass => 5,
645            ProcessIssueType::IncompleteAuditTrail => 4,
646            _ => 3,
647        }
648    }
649}
650
651/// Statistical anomaly types.
652#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
653pub enum StatisticalAnomalyType {
654    // Amount Anomalies
655    /// Amount significantly above normal.
656    UnusuallyHighAmount,
657    /// Amount significantly below normal.
658    UnusuallyLowAmount,
659    /// Violates Benford's Law distribution.
660    BenfordViolation,
661    /// Exact duplicate amount (suspicious).
662    ExactDuplicateAmount,
663    /// Repeating pattern in amounts.
664    RepeatingAmount,
665
666    // Frequency Anomalies
667    /// Unusual transaction frequency.
668    UnusualFrequency,
669    /// Burst of transactions.
670    TransactionBurst,
671    /// Unusual time of day.
672    UnusualTiming,
673
674    // Trend Anomalies
675    /// Break in historical trend.
676    TrendBreak,
677    /// Sudden level shift.
678    LevelShift,
679    /// Seasonal pattern violation.
680    SeasonalAnomaly,
681
682    // Distribution Anomalies
683    /// Outlier in distribution.
684    StatisticalOutlier,
685    /// Change in variance.
686    VarianceChange,
687    /// Distribution shift.
688    DistributionShift,
689
690    // Sourcing/Contract Anomalies
691    /// Pattern of SLA breaches from a vendor.
692    SlaBreachPattern,
693    /// Contract with zero utilization.
694    UnusedContract,
695
696    // HR/Payroll Anomalies
697    /// Anomalous overtime patterns.
698    OvertimeAnomaly,
699}
700
701impl StatisticalAnomalyType {
702    /// Returns severity level (1-5).
703    pub fn severity(&self) -> u8 {
704        match self {
705            StatisticalAnomalyType::UnusualTiming => 1,
706            StatisticalAnomalyType::UnusualFrequency => 2,
707            StatisticalAnomalyType::BenfordViolation => 2,
708            StatisticalAnomalyType::UnusuallyHighAmount => 3,
709            StatisticalAnomalyType::TrendBreak => 3,
710            StatisticalAnomalyType::TransactionBurst => 4,
711            StatisticalAnomalyType::ExactDuplicateAmount => 3,
712            _ => 3,
713        }
714    }
715}
716
717/// Relational/graph anomaly types.
718#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
719pub enum RelationalAnomalyType {
720    // Transaction Pattern Anomalies
721    /// Circular transaction pattern.
722    CircularTransaction,
723    /// Unusual account combination.
724    UnusualAccountPair,
725    /// New trading partner.
726    NewCounterparty,
727    /// Dormant account suddenly active.
728    DormantAccountActivity,
729
730    // Network Anomalies
731    /// Unusual network centrality.
732    CentralityAnomaly,
733    /// Isolated transaction cluster.
734    IsolatedCluster,
735    /// Bridge node anomaly.
736    BridgeNodeAnomaly,
737    /// Community structure change.
738    CommunityAnomaly,
739
740    // Relationship Anomalies
741    /// Missing expected relationship.
742    MissingRelationship,
743    /// Unexpected relationship.
744    UnexpectedRelationship,
745    /// Relationship strength change.
746    RelationshipStrengthChange,
747
748    // Intercompany Anomalies
749    /// Unmatched intercompany transaction.
750    UnmatchedIntercompany,
751    /// Circular intercompany flow.
752    CircularIntercompany,
753    /// Transfer pricing anomaly.
754    TransferPricingAnomaly,
755}
756
757impl RelationalAnomalyType {
758    /// Returns severity level (1-5).
759    pub fn severity(&self) -> u8 {
760        match self {
761            RelationalAnomalyType::NewCounterparty => 1,
762            RelationalAnomalyType::DormantAccountActivity => 2,
763            RelationalAnomalyType::UnusualAccountPair => 2,
764            RelationalAnomalyType::CircularTransaction => 4,
765            RelationalAnomalyType::CircularIntercompany => 4,
766            RelationalAnomalyType::TransferPricingAnomaly => 4,
767            RelationalAnomalyType::UnmatchedIntercompany => 3,
768            _ => 3,
769        }
770    }
771}
772
773/// A labeled anomaly for supervised learning.
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct LabeledAnomaly {
776    /// Unique anomaly identifier.
777    pub anomaly_id: String,
778    /// Type of anomaly.
779    pub anomaly_type: AnomalyType,
780    /// Document or entity that contains the anomaly.
781    pub document_id: String,
782    /// Document type (JE, PO, Invoice, etc.).
783    pub document_type: String,
784    /// Company code.
785    pub company_code: String,
786    /// Date the anomaly occurred.
787    pub anomaly_date: NaiveDate,
788    /// Timestamp when detected/injected.
789    pub detection_timestamp: NaiveDateTime,
790    /// Confidence score (0.0 - 1.0) for injected anomalies.
791    pub confidence: f64,
792    /// Severity (1-5).
793    pub severity: u8,
794    /// Description of the anomaly.
795    pub description: String,
796    /// Related entities (user IDs, account codes, etc.).
797    pub related_entities: Vec<String>,
798    /// Monetary impact if applicable.
799    pub monetary_impact: Option<Decimal>,
800    /// Additional metadata.
801    pub metadata: HashMap<String, String>,
802    /// Whether this was injected (true) or naturally occurring (false).
803    pub is_injected: bool,
804    /// Injection strategy used (if injected) - legacy string field.
805    pub injection_strategy: Option<String>,
806    /// Cluster ID if part of an anomaly cluster.
807    pub cluster_id: Option<String>,
808
809    // ========================================
810    // PROVENANCE TRACKING FIELDS (Phase 1.2)
811    // ========================================
812    /// Hash of the original document before modification.
813    /// Enables tracking what the document looked like pre-injection.
814    #[serde(default, skip_serializing_if = "Option::is_none")]
815    pub original_document_hash: Option<String>,
816
817    /// Causal reason explaining why this anomaly was injected.
818    /// Provides "why" tracking for each anomaly.
819    #[serde(default, skip_serializing_if = "Option::is_none")]
820    pub causal_reason: Option<AnomalyCausalReason>,
821
822    /// Structured injection strategy with parameters.
823    /// More detailed than the legacy string-based injection_strategy field.
824    #[serde(default, skip_serializing_if = "Option::is_none")]
825    pub structured_strategy: Option<InjectionStrategy>,
826
827    /// Parent anomaly ID if this was derived from another anomaly.
828    /// Enables anomaly transformation chains.
829    #[serde(default, skip_serializing_if = "Option::is_none")]
830    pub parent_anomaly_id: Option<String>,
831
832    /// Child anomaly IDs that were derived from this anomaly.
833    #[serde(default, skip_serializing_if = "Vec::is_empty")]
834    pub child_anomaly_ids: Vec<String>,
835
836    /// Scenario ID if this anomaly is part of a multi-step scenario.
837    #[serde(default, skip_serializing_if = "Option::is_none")]
838    pub scenario_id: Option<String>,
839
840    /// Generation run ID that produced this anomaly.
841    /// Enables tracing anomalies back to their generation run.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub run_id: Option<String>,
844
845    /// Seed used for RNG during generation.
846    /// Enables reproducibility.
847    #[serde(default, skip_serializing_if = "Option::is_none")]
848    pub generation_seed: Option<u64>,
849}
850
851impl LabeledAnomaly {
852    /// Creates a new labeled anomaly.
853    pub fn new(
854        anomaly_id: String,
855        anomaly_type: AnomalyType,
856        document_id: String,
857        document_type: String,
858        company_code: String,
859        anomaly_date: NaiveDate,
860    ) -> Self {
861        let severity = anomaly_type.severity();
862        let description = format!(
863            "{} - {} in document {}",
864            anomaly_type.category(),
865            anomaly_type.type_name(),
866            document_id
867        );
868
869        Self {
870            anomaly_id,
871            anomaly_type,
872            document_id,
873            document_type,
874            company_code,
875            anomaly_date,
876            detection_timestamp: chrono::Local::now().naive_local(),
877            confidence: 1.0,
878            severity,
879            description,
880            related_entities: Vec::new(),
881            monetary_impact: None,
882            metadata: HashMap::new(),
883            is_injected: true,
884            injection_strategy: None,
885            cluster_id: None,
886            // Provenance fields
887            original_document_hash: None,
888            causal_reason: None,
889            structured_strategy: None,
890            parent_anomaly_id: None,
891            child_anomaly_ids: Vec::new(),
892            scenario_id: None,
893            run_id: None,
894            generation_seed: None,
895        }
896    }
897
898    /// Sets the description.
899    pub fn with_description(mut self, description: &str) -> Self {
900        self.description = description.to_string();
901        self
902    }
903
904    /// Sets the monetary impact.
905    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
906        self.monetary_impact = Some(impact);
907        self
908    }
909
910    /// Adds a related entity.
911    pub fn with_related_entity(mut self, entity: &str) -> Self {
912        self.related_entities.push(entity.to_string());
913        self
914    }
915
916    /// Adds metadata.
917    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
918        self.metadata.insert(key.to_string(), value.to_string());
919        self
920    }
921
922    /// Sets the injection strategy (legacy string).
923    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
924        self.injection_strategy = Some(strategy.to_string());
925        self
926    }
927
928    /// Sets the cluster ID.
929    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
930        self.cluster_id = Some(cluster_id.to_string());
931        self
932    }
933
934    // ========================================
935    // PROVENANCE BUILDER METHODS (Phase 1.2)
936    // ========================================
937
938    /// Sets the original document hash for provenance tracking.
939    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
940        self.original_document_hash = Some(hash.to_string());
941        self
942    }
943
944    /// Sets the causal reason for this anomaly.
945    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
946        self.causal_reason = Some(reason);
947        self
948    }
949
950    /// Sets the structured injection strategy.
951    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
952        // Also set the legacy string field for backward compatibility
953        self.injection_strategy = Some(strategy.strategy_type().to_string());
954        self.structured_strategy = Some(strategy);
955        self
956    }
957
958    /// Sets the parent anomaly ID (for anomaly derivation chains).
959    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
960        self.parent_anomaly_id = Some(parent_id.to_string());
961        self
962    }
963
964    /// Adds a child anomaly ID.
965    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
966        self.child_anomaly_ids.push(child_id.to_string());
967        self
968    }
969
970    /// Sets the scenario ID for multi-step scenario tracking.
971    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
972        self.scenario_id = Some(scenario_id.to_string());
973        self
974    }
975
976    /// Sets the generation run ID.
977    pub fn with_run_id(mut self, run_id: &str) -> Self {
978        self.run_id = Some(run_id.to_string());
979        self
980    }
981
982    /// Sets the generation seed for reproducibility.
983    pub fn with_generation_seed(mut self, seed: u64) -> Self {
984        self.generation_seed = Some(seed);
985        self
986    }
987
988    /// Sets multiple provenance fields at once for convenience.
989    pub fn with_provenance(
990        mut self,
991        run_id: Option<&str>,
992        seed: Option<u64>,
993        causal_reason: Option<AnomalyCausalReason>,
994    ) -> Self {
995        if let Some(id) = run_id {
996            self.run_id = Some(id.to_string());
997        }
998        self.generation_seed = seed;
999        self.causal_reason = causal_reason;
1000        self
1001    }
1002
1003    /// Converts to a feature vector for ML.
1004    ///
1005    /// Returns a vector of 15 features:
1006    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1007    /// - 1 feature: Severity (normalized 0-1)
1008    /// - 1 feature: Confidence
1009    /// - 1 feature: Has monetary impact (0/1)
1010    /// - 1 feature: Monetary impact (log-scaled)
1011    /// - 1 feature: Is intentional (0/1)
1012    /// - 1 feature: Number of related entities
1013    /// - 1 feature: Is part of cluster (0/1)
1014    /// - 1 feature: Is part of scenario (0/1)
1015    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1016    pub fn to_features(&self) -> Vec<f64> {
1017        let mut features = Vec::new();
1018
1019        // Category one-hot encoding
1020        let categories = [
1021            "Fraud",
1022            "Error",
1023            "ProcessIssue",
1024            "Statistical",
1025            "Relational",
1026            "Custom",
1027        ];
1028        for cat in &categories {
1029            features.push(if self.anomaly_type.category() == *cat {
1030                1.0
1031            } else {
1032                0.0
1033            });
1034        }
1035
1036        // Severity (normalized)
1037        features.push(self.severity as f64 / 5.0);
1038
1039        // Confidence
1040        features.push(self.confidence);
1041
1042        // Has monetary impact
1043        features.push(if self.monetary_impact.is_some() {
1044            1.0
1045        } else {
1046            0.0
1047        });
1048
1049        // Monetary impact (log-scaled)
1050        if let Some(impact) = self.monetary_impact {
1051            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1052            features.push((impact_f64.abs() + 1.0).ln());
1053        } else {
1054            features.push(0.0);
1055        }
1056
1057        // Is intentional
1058        features.push(if self.anomaly_type.is_intentional() {
1059            1.0
1060        } else {
1061            0.0
1062        });
1063
1064        // Number of related entities
1065        features.push(self.related_entities.len() as f64);
1066
1067        // Is part of cluster
1068        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1069
1070        // Provenance features
1071        // Is part of scenario
1072        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1073
1074        // Has parent anomaly (indicates this is a derived anomaly)
1075        features.push(if self.parent_anomaly_id.is_some() {
1076            1.0
1077        } else {
1078            0.0
1079        });
1080
1081        features
1082    }
1083
1084    /// Returns the number of features in the feature vector.
1085    pub fn feature_count() -> usize {
1086        15 // 6 category + 9 other features
1087    }
1088
1089    /// Returns feature names for documentation/ML metadata.
1090    pub fn feature_names() -> Vec<&'static str> {
1091        vec![
1092            "category_fraud",
1093            "category_error",
1094            "category_process_issue",
1095            "category_statistical",
1096            "category_relational",
1097            "category_custom",
1098            "severity_normalized",
1099            "confidence",
1100            "has_monetary_impact",
1101            "monetary_impact_log",
1102            "is_intentional",
1103            "related_entity_count",
1104            "is_clustered",
1105            "is_scenario_part",
1106            "is_derived",
1107        ]
1108    }
1109}
1110
1111/// Summary of anomalies for reporting.
1112#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1113pub struct AnomalySummary {
1114    /// Total anomaly count.
1115    pub total_count: usize,
1116    /// Count by category.
1117    pub by_category: HashMap<String, usize>,
1118    /// Count by specific type.
1119    pub by_type: HashMap<String, usize>,
1120    /// Count by severity.
1121    pub by_severity: HashMap<u8, usize>,
1122    /// Count by company.
1123    pub by_company: HashMap<String, usize>,
1124    /// Total monetary impact.
1125    pub total_monetary_impact: Decimal,
1126    /// Date range.
1127    pub date_range: Option<(NaiveDate, NaiveDate)>,
1128    /// Number of clusters.
1129    pub cluster_count: usize,
1130}
1131
1132impl AnomalySummary {
1133    /// Creates a summary from a list of anomalies.
1134    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1135        let mut summary = AnomalySummary {
1136            total_count: anomalies.len(),
1137            ..Default::default()
1138        };
1139
1140        let mut min_date: Option<NaiveDate> = None;
1141        let mut max_date: Option<NaiveDate> = None;
1142        let mut clusters = std::collections::HashSet::new();
1143
1144        for anomaly in anomalies {
1145            // By category
1146            *summary
1147                .by_category
1148                .entry(anomaly.anomaly_type.category().to_string())
1149                .or_insert(0) += 1;
1150
1151            // By type
1152            *summary
1153                .by_type
1154                .entry(anomaly.anomaly_type.type_name())
1155                .or_insert(0) += 1;
1156
1157            // By severity
1158            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1159
1160            // By company
1161            *summary
1162                .by_company
1163                .entry(anomaly.company_code.clone())
1164                .or_insert(0) += 1;
1165
1166            // Monetary impact
1167            if let Some(impact) = anomaly.monetary_impact {
1168                summary.total_monetary_impact += impact;
1169            }
1170
1171            // Date range
1172            match min_date {
1173                None => min_date = Some(anomaly.anomaly_date),
1174                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1175                _ => {}
1176            }
1177            match max_date {
1178                None => max_date = Some(anomaly.anomaly_date),
1179                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1180                _ => {}
1181            }
1182
1183            // Clusters
1184            if let Some(cluster_id) = &anomaly.cluster_id {
1185                clusters.insert(cluster_id.clone());
1186            }
1187        }
1188
1189        summary.date_range = min_date.zip(max_date);
1190        summary.cluster_count = clusters.len();
1191
1192        summary
1193    }
1194}
1195
1196// ============================================================================
1197// ENHANCED ANOMALY TAXONOMY (FR-003)
1198// ============================================================================
1199
1200/// High-level anomaly category for multi-class classification.
1201///
1202/// These categories provide a more granular classification than the base
1203/// AnomalyType enum, enabling better ML model training and audit reporting.
1204#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1205pub enum AnomalyCategory {
1206    // Vendor-related anomalies
1207    /// Fictitious or shell vendor.
1208    FictitiousVendor,
1209    /// Kickback or collusion with vendor.
1210    VendorKickback,
1211    /// Related party vendor transactions.
1212    RelatedPartyVendor,
1213
1214    // Transaction-related anomalies
1215    /// Duplicate payment or invoice.
1216    DuplicatePayment,
1217    /// Unauthorized transaction.
1218    UnauthorizedTransaction,
1219    /// Structured transactions to avoid thresholds.
1220    StructuredTransaction,
1221
1222    // Pattern-based anomalies
1223    /// Circular flow of funds.
1224    CircularFlow,
1225    /// Behavioral anomaly (deviation from normal patterns).
1226    BehavioralAnomaly,
1227    /// Timing-based anomaly.
1228    TimingAnomaly,
1229
1230    // Journal entry anomalies
1231    /// Manual journal entry anomaly.
1232    JournalAnomaly,
1233    /// Manual override of controls.
1234    ManualOverride,
1235    /// Missing approval in chain.
1236    MissingApproval,
1237
1238    // Statistical anomalies
1239    /// Statistical outlier.
1240    StatisticalOutlier,
1241    /// Distribution anomaly (Benford, etc.).
1242    DistributionAnomaly,
1243
1244    // Custom category
1245    /// User-defined category.
1246    Custom(String),
1247}
1248
1249impl AnomalyCategory {
1250    /// Derives an AnomalyCategory from an AnomalyType.
1251    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1252        match anomaly_type {
1253            AnomalyType::Fraud(fraud_type) => match fraud_type {
1254                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1255                    AnomalyCategory::FictitiousVendor
1256                }
1257                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1258                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1259                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1260                    AnomalyCategory::StructuredTransaction
1261                }
1262                FraudType::SelfApproval
1263                | FraudType::UnauthorizedApproval
1264                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1265                FraudType::TimingAnomaly
1266                | FraudType::RoundDollarManipulation
1267                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1268                _ => AnomalyCategory::BehavioralAnomaly,
1269            },
1270            AnomalyType::Error(error_type) => match error_type {
1271                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1272                ErrorType::WrongPeriod
1273                | ErrorType::BackdatedEntry
1274                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1275                _ => AnomalyCategory::JournalAnomaly,
1276            },
1277            AnomalyType::ProcessIssue(process_type) => match process_type {
1278                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1279                    AnomalyCategory::MissingApproval
1280                }
1281                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1282                    AnomalyCategory::ManualOverride
1283                }
1284                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1285                    AnomalyCategory::TimingAnomaly
1286                }
1287                _ => AnomalyCategory::BehavioralAnomaly,
1288            },
1289            AnomalyType::Statistical(stat_type) => match stat_type {
1290                StatisticalAnomalyType::BenfordViolation
1291                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1292                _ => AnomalyCategory::StatisticalOutlier,
1293            },
1294            AnomalyType::Relational(rel_type) => match rel_type {
1295                RelationalAnomalyType::CircularTransaction
1296                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1297                _ => AnomalyCategory::BehavioralAnomaly,
1298            },
1299            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1300        }
1301    }
1302
1303    /// Returns the category name as a string.
1304    pub fn name(&self) -> &str {
1305        match self {
1306            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1307            AnomalyCategory::VendorKickback => "vendor_kickback",
1308            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1309            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1310            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1311            AnomalyCategory::StructuredTransaction => "structured_transaction",
1312            AnomalyCategory::CircularFlow => "circular_flow",
1313            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1314            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1315            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1316            AnomalyCategory::ManualOverride => "manual_override",
1317            AnomalyCategory::MissingApproval => "missing_approval",
1318            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1319            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1320            AnomalyCategory::Custom(s) => s.as_str(),
1321        }
1322    }
1323
1324    /// Returns the ordinal value for ML encoding.
1325    pub fn ordinal(&self) -> u8 {
1326        match self {
1327            AnomalyCategory::FictitiousVendor => 0,
1328            AnomalyCategory::VendorKickback => 1,
1329            AnomalyCategory::RelatedPartyVendor => 2,
1330            AnomalyCategory::DuplicatePayment => 3,
1331            AnomalyCategory::UnauthorizedTransaction => 4,
1332            AnomalyCategory::StructuredTransaction => 5,
1333            AnomalyCategory::CircularFlow => 6,
1334            AnomalyCategory::BehavioralAnomaly => 7,
1335            AnomalyCategory::TimingAnomaly => 8,
1336            AnomalyCategory::JournalAnomaly => 9,
1337            AnomalyCategory::ManualOverride => 10,
1338            AnomalyCategory::MissingApproval => 11,
1339            AnomalyCategory::StatisticalOutlier => 12,
1340            AnomalyCategory::DistributionAnomaly => 13,
1341            AnomalyCategory::Custom(_) => 14,
1342        }
1343    }
1344
1345    /// Returns the total number of categories (excluding Custom).
1346    pub fn category_count() -> usize {
1347        15 // 14 fixed categories + Custom
1348    }
1349}
1350
1351/// Type of contributing factor for anomaly confidence/severity calculation.
1352#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1353pub enum FactorType {
1354    /// Amount deviation from expected value.
1355    AmountDeviation,
1356    /// Proximity to approval/reporting threshold.
1357    ThresholdProximity,
1358    /// Timing-related anomaly indicator.
1359    TimingAnomaly,
1360    /// Entity risk score contribution.
1361    EntityRisk,
1362    /// Pattern match confidence.
1363    PatternMatch,
1364    /// Frequency deviation from normal.
1365    FrequencyDeviation,
1366    /// Relationship-based anomaly indicator.
1367    RelationshipAnomaly,
1368    /// Control bypass indicator.
1369    ControlBypass,
1370    /// Benford's Law violation.
1371    BenfordViolation,
1372    /// Duplicate indicator.
1373    DuplicateIndicator,
1374    /// Approval chain issue.
1375    ApprovalChainIssue,
1376    /// Documentation gap.
1377    DocumentationGap,
1378    /// Custom factor type.
1379    Custom,
1380}
1381
1382impl FactorType {
1383    /// Returns the factor type name.
1384    pub fn name(&self) -> &'static str {
1385        match self {
1386            FactorType::AmountDeviation => "amount_deviation",
1387            FactorType::ThresholdProximity => "threshold_proximity",
1388            FactorType::TimingAnomaly => "timing_anomaly",
1389            FactorType::EntityRisk => "entity_risk",
1390            FactorType::PatternMatch => "pattern_match",
1391            FactorType::FrequencyDeviation => "frequency_deviation",
1392            FactorType::RelationshipAnomaly => "relationship_anomaly",
1393            FactorType::ControlBypass => "control_bypass",
1394            FactorType::BenfordViolation => "benford_violation",
1395            FactorType::DuplicateIndicator => "duplicate_indicator",
1396            FactorType::ApprovalChainIssue => "approval_chain_issue",
1397            FactorType::DocumentationGap => "documentation_gap",
1398            FactorType::Custom => "custom",
1399        }
1400    }
1401}
1402
1403/// Evidence supporting a contributing factor.
1404#[derive(Debug, Clone, Serialize, Deserialize)]
1405pub struct FactorEvidence {
1406    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1407    pub source: String,
1408    /// Raw evidence data.
1409    pub data: HashMap<String, String>,
1410}
1411
1412/// A contributing factor to anomaly confidence/severity.
1413#[derive(Debug, Clone, Serialize, Deserialize)]
1414pub struct ContributingFactor {
1415    /// Type of factor.
1416    pub factor_type: FactorType,
1417    /// Observed value.
1418    pub value: f64,
1419    /// Threshold or expected value.
1420    pub threshold: f64,
1421    /// Direction of comparison (true = value > threshold is anomalous).
1422    pub direction_greater: bool,
1423    /// Weight of this factor in overall calculation (0.0 - 1.0).
1424    pub weight: f64,
1425    /// Human-readable description.
1426    pub description: String,
1427    /// Optional supporting evidence.
1428    pub evidence: Option<FactorEvidence>,
1429}
1430
1431impl ContributingFactor {
1432    /// Creates a new contributing factor.
1433    pub fn new(
1434        factor_type: FactorType,
1435        value: f64,
1436        threshold: f64,
1437        direction_greater: bool,
1438        weight: f64,
1439        description: &str,
1440    ) -> Self {
1441        Self {
1442            factor_type,
1443            value,
1444            threshold,
1445            direction_greater,
1446            weight,
1447            description: description.to_string(),
1448            evidence: None,
1449        }
1450    }
1451
1452    /// Adds evidence to the factor.
1453    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1454        self.evidence = Some(FactorEvidence {
1455            source: source.to_string(),
1456            data,
1457        });
1458        self
1459    }
1460
1461    /// Calculates the factor's contribution to anomaly score.
1462    pub fn contribution(&self) -> f64 {
1463        let deviation = if self.direction_greater {
1464            (self.value - self.threshold).max(0.0)
1465        } else {
1466            (self.threshold - self.value).max(0.0)
1467        };
1468
1469        // Normalize by threshold to get relative deviation
1470        let relative_deviation = if self.threshold.abs() > 0.001 {
1471            deviation / self.threshold.abs()
1472        } else {
1473            deviation
1474        };
1475
1476        // Apply weight and cap at 1.0
1477        (relative_deviation * self.weight).min(1.0)
1478    }
1479}
1480
1481/// Enhanced anomaly label with dynamic confidence and severity.
1482#[derive(Debug, Clone, Serialize, Deserialize)]
1483pub struct EnhancedAnomalyLabel {
1484    /// Base labeled anomaly (backward compatible).
1485    pub base: LabeledAnomaly,
1486    /// Enhanced category classification.
1487    pub category: AnomalyCategory,
1488    /// Dynamically calculated confidence (0.0 - 1.0).
1489    pub enhanced_confidence: f64,
1490    /// Contextually calculated severity (0.0 - 1.0).
1491    pub enhanced_severity: f64,
1492    /// Factors contributing to confidence/severity.
1493    pub contributing_factors: Vec<ContributingFactor>,
1494    /// Secondary categories (for multi-label classification).
1495    pub secondary_categories: Vec<AnomalyCategory>,
1496}
1497
1498impl EnhancedAnomalyLabel {
1499    /// Creates an enhanced label from a base labeled anomaly.
1500    pub fn from_base(base: LabeledAnomaly) -> Self {
1501        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1502        let enhanced_confidence = base.confidence;
1503        let enhanced_severity = base.severity as f64 / 5.0;
1504
1505        Self {
1506            base,
1507            category,
1508            enhanced_confidence,
1509            enhanced_severity,
1510            contributing_factors: Vec::new(),
1511            secondary_categories: Vec::new(),
1512        }
1513    }
1514
1515    /// Sets the enhanced confidence.
1516    pub fn with_confidence(mut self, confidence: f64) -> Self {
1517        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1518        self
1519    }
1520
1521    /// Sets the enhanced severity.
1522    pub fn with_severity(mut self, severity: f64) -> Self {
1523        self.enhanced_severity = severity.clamp(0.0, 1.0);
1524        self
1525    }
1526
1527    /// Adds a contributing factor.
1528    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1529        self.contributing_factors.push(factor);
1530        self
1531    }
1532
1533    /// Adds a secondary category.
1534    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1535        if !self.secondary_categories.contains(&category) && category != self.category {
1536            self.secondary_categories.push(category);
1537        }
1538        self
1539    }
1540
1541    /// Converts to an extended feature vector.
1542    ///
1543    /// Returns base features (15) + enhanced features (10) = 25 features.
1544    pub fn to_features(&self) -> Vec<f64> {
1545        let mut features = self.base.to_features();
1546
1547        // Enhanced features
1548        features.push(self.enhanced_confidence);
1549        features.push(self.enhanced_severity);
1550        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1551        features.push(self.secondary_categories.len() as f64);
1552        features.push(self.contributing_factors.len() as f64);
1553
1554        // Max factor weight
1555        let max_weight = self
1556            .contributing_factors
1557            .iter()
1558            .map(|f| f.weight)
1559            .fold(0.0, f64::max);
1560        features.push(max_weight);
1561
1562        // Factor type indicators (binary flags for key factor types)
1563        let has_control_bypass = self
1564            .contributing_factors
1565            .iter()
1566            .any(|f| f.factor_type == FactorType::ControlBypass);
1567        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1568
1569        let has_amount_deviation = self
1570            .contributing_factors
1571            .iter()
1572            .any(|f| f.factor_type == FactorType::AmountDeviation);
1573        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1574
1575        let has_timing = self
1576            .contributing_factors
1577            .iter()
1578            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1579        features.push(if has_timing { 1.0 } else { 0.0 });
1580
1581        let has_pattern_match = self
1582            .contributing_factors
1583            .iter()
1584            .any(|f| f.factor_type == FactorType::PatternMatch);
1585        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1586
1587        features
1588    }
1589
1590    /// Returns the number of features in the enhanced feature vector.
1591    pub fn feature_count() -> usize {
1592        25 // 15 base + 10 enhanced
1593    }
1594
1595    /// Returns feature names for the enhanced feature vector.
1596    pub fn feature_names() -> Vec<&'static str> {
1597        let mut names = LabeledAnomaly::feature_names();
1598        names.extend(vec![
1599            "enhanced_confidence",
1600            "enhanced_severity",
1601            "category_ordinal",
1602            "secondary_category_count",
1603            "contributing_factor_count",
1604            "max_factor_weight",
1605            "has_control_bypass",
1606            "has_amount_deviation",
1607            "has_timing_factor",
1608            "has_pattern_match",
1609        ]);
1610        names
1611    }
1612}
1613
1614// ============================================================================
1615// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1616// ============================================================================
1617
1618/// Severity level classification for anomalies.
1619#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1620pub enum SeverityLevel {
1621    /// Minor issue, low impact.
1622    Low,
1623    /// Moderate issue, noticeable impact.
1624    #[default]
1625    Medium,
1626    /// Significant issue, substantial impact.
1627    High,
1628    /// Critical issue, severe impact requiring immediate attention.
1629    Critical,
1630}
1631
1632impl SeverityLevel {
1633    /// Returns the numeric value (1-4) for the severity level.
1634    pub fn numeric(&self) -> u8 {
1635        match self {
1636            SeverityLevel::Low => 1,
1637            SeverityLevel::Medium => 2,
1638            SeverityLevel::High => 3,
1639            SeverityLevel::Critical => 4,
1640        }
1641    }
1642
1643    /// Creates a severity level from a numeric value.
1644    pub fn from_numeric(value: u8) -> Self {
1645        match value {
1646            1 => SeverityLevel::Low,
1647            2 => SeverityLevel::Medium,
1648            3 => SeverityLevel::High,
1649            _ => SeverityLevel::Critical,
1650        }
1651    }
1652
1653    /// Creates a severity level from a normalized score (0.0-1.0).
1654    pub fn from_score(score: f64) -> Self {
1655        match score {
1656            s if s < 0.25 => SeverityLevel::Low,
1657            s if s < 0.50 => SeverityLevel::Medium,
1658            s if s < 0.75 => SeverityLevel::High,
1659            _ => SeverityLevel::Critical,
1660        }
1661    }
1662
1663    /// Returns a normalized score (0.0-1.0) for this severity level.
1664    pub fn to_score(&self) -> f64 {
1665        match self {
1666            SeverityLevel::Low => 0.125,
1667            SeverityLevel::Medium => 0.375,
1668            SeverityLevel::High => 0.625,
1669            SeverityLevel::Critical => 0.875,
1670        }
1671    }
1672}
1673
1674/// Structured severity scoring for anomalies.
1675#[derive(Debug, Clone, Serialize, Deserialize)]
1676pub struct AnomalySeverity {
1677    /// Severity level classification.
1678    pub level: SeverityLevel,
1679    /// Continuous severity score (0.0-1.0).
1680    pub score: f64,
1681    /// Absolute financial impact amount.
1682    pub financial_impact: Decimal,
1683    /// Whether this exceeds materiality threshold.
1684    pub is_material: bool,
1685    /// Materiality threshold used for determination.
1686    #[serde(default, skip_serializing_if = "Option::is_none")]
1687    pub materiality_threshold: Option<Decimal>,
1688}
1689
1690impl AnomalySeverity {
1691    /// Creates a new severity assessment.
1692    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1693        Self {
1694            level,
1695            score: level.to_score(),
1696            financial_impact,
1697            is_material: false,
1698            materiality_threshold: None,
1699        }
1700    }
1701
1702    /// Creates severity from a score, auto-determining level.
1703    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1704        Self {
1705            level: SeverityLevel::from_score(score),
1706            score: score.clamp(0.0, 1.0),
1707            financial_impact,
1708            is_material: false,
1709            materiality_threshold: None,
1710        }
1711    }
1712
1713    /// Sets the materiality assessment.
1714    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1715        self.materiality_threshold = Some(threshold);
1716        self.is_material = self.financial_impact.abs() >= threshold;
1717        self
1718    }
1719}
1720
1721impl Default for AnomalySeverity {
1722    fn default() -> Self {
1723        Self {
1724            level: SeverityLevel::Medium,
1725            score: 0.5,
1726            financial_impact: Decimal::ZERO,
1727            is_material: false,
1728            materiality_threshold: None,
1729        }
1730    }
1731}
1732
1733/// Detection difficulty classification for anomalies.
1734///
1735/// Categorizes how difficult an anomaly is to detect, which is useful
1736/// for ML model benchmarking and audit procedure selection.
1737///
1738/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1739/// is used for drift event classification and has different variants.
1740#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1741pub enum AnomalyDetectionDifficulty {
1742    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1743    Trivial,
1744    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1745    Easy,
1746    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1747    #[default]
1748    Moderate,
1749    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1750    Hard,
1751    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1752    Expert,
1753}
1754
1755impl AnomalyDetectionDifficulty {
1756    /// Returns the expected detection rate for this difficulty level.
1757    pub fn expected_detection_rate(&self) -> f64 {
1758        match self {
1759            AnomalyDetectionDifficulty::Trivial => 0.99,
1760            AnomalyDetectionDifficulty::Easy => 0.90,
1761            AnomalyDetectionDifficulty::Moderate => 0.70,
1762            AnomalyDetectionDifficulty::Hard => 0.40,
1763            AnomalyDetectionDifficulty::Expert => 0.15,
1764        }
1765    }
1766
1767    /// Returns a numeric difficulty score (0.0-1.0).
1768    pub fn difficulty_score(&self) -> f64 {
1769        match self {
1770            AnomalyDetectionDifficulty::Trivial => 0.05,
1771            AnomalyDetectionDifficulty::Easy => 0.25,
1772            AnomalyDetectionDifficulty::Moderate => 0.50,
1773            AnomalyDetectionDifficulty::Hard => 0.75,
1774            AnomalyDetectionDifficulty::Expert => 0.95,
1775        }
1776    }
1777
1778    /// Creates a difficulty level from a score (0.0-1.0).
1779    pub fn from_score(score: f64) -> Self {
1780        match score {
1781            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1782            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1783            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1784            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1785            _ => AnomalyDetectionDifficulty::Expert,
1786        }
1787    }
1788
1789    /// Returns the name of this difficulty level.
1790    pub fn name(&self) -> &'static str {
1791        match self {
1792            AnomalyDetectionDifficulty::Trivial => "trivial",
1793            AnomalyDetectionDifficulty::Easy => "easy",
1794            AnomalyDetectionDifficulty::Moderate => "moderate",
1795            AnomalyDetectionDifficulty::Hard => "hard",
1796            AnomalyDetectionDifficulty::Expert => "expert",
1797        }
1798    }
1799}
1800
1801/// Ground truth certainty level for anomaly labels.
1802///
1803/// Indicates how certain we are that the label is correct.
1804#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1805pub enum GroundTruthCertainty {
1806    /// Definitively known (injected anomaly with full provenance).
1807    #[default]
1808    Definite,
1809    /// Highly probable based on strong evidence.
1810    Probable,
1811    /// Possibly an anomaly based on indirect evidence.
1812    Possible,
1813}
1814
1815impl GroundTruthCertainty {
1816    /// Returns a certainty score (0.0-1.0).
1817    pub fn certainty_score(&self) -> f64 {
1818        match self {
1819            GroundTruthCertainty::Definite => 1.0,
1820            GroundTruthCertainty::Probable => 0.8,
1821            GroundTruthCertainty::Possible => 0.5,
1822        }
1823    }
1824
1825    /// Returns the name of this certainty level.
1826    pub fn name(&self) -> &'static str {
1827        match self {
1828            GroundTruthCertainty::Definite => "definite",
1829            GroundTruthCertainty::Probable => "probable",
1830            GroundTruthCertainty::Possible => "possible",
1831        }
1832    }
1833}
1834
1835/// Detection method classification.
1836///
1837/// Indicates which detection methods are recommended or effective for an anomaly.
1838#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1839pub enum DetectionMethod {
1840    /// Simple rule-based detection (thresholds, filters).
1841    RuleBased,
1842    /// Statistical analysis (distributions, outlier detection).
1843    Statistical,
1844    /// Machine learning models (classification, anomaly detection).
1845    MachineLearning,
1846    /// Graph-based analysis (network patterns, relationships).
1847    GraphBased,
1848    /// Manual forensic audit procedures.
1849    ForensicAudit,
1850    /// Combination of multiple methods.
1851    Hybrid,
1852}
1853
1854impl DetectionMethod {
1855    /// Returns the name of this detection method.
1856    pub fn name(&self) -> &'static str {
1857        match self {
1858            DetectionMethod::RuleBased => "rule_based",
1859            DetectionMethod::Statistical => "statistical",
1860            DetectionMethod::MachineLearning => "machine_learning",
1861            DetectionMethod::GraphBased => "graph_based",
1862            DetectionMethod::ForensicAudit => "forensic_audit",
1863            DetectionMethod::Hybrid => "hybrid",
1864        }
1865    }
1866
1867    /// Returns a description of this detection method.
1868    pub fn description(&self) -> &'static str {
1869        match self {
1870            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1871            DetectionMethod::Statistical => "Statistical distribution analysis",
1872            DetectionMethod::MachineLearning => "ML classification models",
1873            DetectionMethod::GraphBased => "Network and relationship analysis",
1874            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1875            DetectionMethod::Hybrid => "Combined multi-method approach",
1876        }
1877    }
1878}
1879
1880/// Extended anomaly label with comprehensive multi-dimensional classification.
1881///
1882/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1883/// severity scoring, detection difficulty, recommended methods, and ground truth.
1884#[derive(Debug, Clone, Serialize, Deserialize)]
1885pub struct ExtendedAnomalyLabel {
1886    /// Base labeled anomaly.
1887    pub base: LabeledAnomaly,
1888    /// Enhanced category classification.
1889    pub category: AnomalyCategory,
1890    /// Structured severity assessment.
1891    pub severity: AnomalySeverity,
1892    /// Detection difficulty classification.
1893    pub detection_difficulty: AnomalyDetectionDifficulty,
1894    /// Recommended detection methods for this anomaly.
1895    pub recommended_methods: Vec<DetectionMethod>,
1896    /// Key indicators that should trigger detection.
1897    pub key_indicators: Vec<String>,
1898    /// Ground truth certainty level.
1899    pub ground_truth_certainty: GroundTruthCertainty,
1900    /// Contributing factors to confidence/severity.
1901    pub contributing_factors: Vec<ContributingFactor>,
1902    /// Related entity IDs (vendors, customers, employees, etc.).
1903    pub related_entity_ids: Vec<String>,
1904    /// Secondary categories for multi-label classification.
1905    pub secondary_categories: Vec<AnomalyCategory>,
1906    /// Scheme ID if part of a multi-stage fraud scheme.
1907    #[serde(default, skip_serializing_if = "Option::is_none")]
1908    pub scheme_id: Option<String>,
1909    /// Stage number within a scheme (1-indexed).
1910    #[serde(default, skip_serializing_if = "Option::is_none")]
1911    pub scheme_stage: Option<u32>,
1912    /// Whether this is a near-miss (suspicious but legitimate).
1913    #[serde(default)]
1914    pub is_near_miss: bool,
1915    /// Explanation if this is a near-miss.
1916    #[serde(default, skip_serializing_if = "Option::is_none")]
1917    pub near_miss_explanation: Option<String>,
1918}
1919
1920impl ExtendedAnomalyLabel {
1921    /// Creates an extended label from a base labeled anomaly.
1922    pub fn from_base(base: LabeledAnomaly) -> Self {
1923        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1924        let severity = AnomalySeverity {
1925            level: SeverityLevel::from_numeric(base.severity),
1926            score: base.severity as f64 / 5.0,
1927            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1928            is_material: false,
1929            materiality_threshold: None,
1930        };
1931
1932        Self {
1933            base,
1934            category,
1935            severity,
1936            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1937            recommended_methods: vec![DetectionMethod::RuleBased],
1938            key_indicators: Vec::new(),
1939            ground_truth_certainty: GroundTruthCertainty::Definite,
1940            contributing_factors: Vec::new(),
1941            related_entity_ids: Vec::new(),
1942            secondary_categories: Vec::new(),
1943            scheme_id: None,
1944            scheme_stage: None,
1945            is_near_miss: false,
1946            near_miss_explanation: None,
1947        }
1948    }
1949
1950    /// Sets the severity assessment.
1951    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1952        self.severity = severity;
1953        self
1954    }
1955
1956    /// Sets the detection difficulty.
1957    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1958        self.detection_difficulty = difficulty;
1959        self
1960    }
1961
1962    /// Adds a recommended detection method.
1963    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1964        if !self.recommended_methods.contains(&method) {
1965            self.recommended_methods.push(method);
1966        }
1967        self
1968    }
1969
1970    /// Sets the recommended detection methods.
1971    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
1972        self.recommended_methods = methods;
1973        self
1974    }
1975
1976    /// Adds a key indicator.
1977    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
1978        self.key_indicators.push(indicator.into());
1979        self
1980    }
1981
1982    /// Sets the ground truth certainty.
1983    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
1984        self.ground_truth_certainty = certainty;
1985        self
1986    }
1987
1988    /// Adds a contributing factor.
1989    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1990        self.contributing_factors.push(factor);
1991        self
1992    }
1993
1994    /// Adds a related entity ID.
1995    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
1996        self.related_entity_ids.push(entity_id.into());
1997        self
1998    }
1999
2000    /// Adds a secondary category.
2001    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2002        if category != self.category && !self.secondary_categories.contains(&category) {
2003            self.secondary_categories.push(category);
2004        }
2005        self
2006    }
2007
2008    /// Sets scheme information.
2009    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2010        self.scheme_id = Some(scheme_id.into());
2011        self.scheme_stage = Some(stage);
2012        self
2013    }
2014
2015    /// Marks this as a near-miss with explanation.
2016    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2017        self.is_near_miss = true;
2018        self.near_miss_explanation = Some(explanation.into());
2019        self
2020    }
2021
2022    /// Converts to an extended feature vector for ML.
2023    ///
2024    /// Returns base features (15) + extended features (15) = 30 features.
2025    pub fn to_features(&self) -> Vec<f64> {
2026        let mut features = self.base.to_features();
2027
2028        // Extended features
2029        features.push(self.severity.score);
2030        features.push(self.severity.level.to_score());
2031        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2032        features.push(self.detection_difficulty.difficulty_score());
2033        features.push(self.detection_difficulty.expected_detection_rate());
2034        features.push(self.ground_truth_certainty.certainty_score());
2035        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2036        features.push(self.secondary_categories.len() as f64);
2037        features.push(self.contributing_factors.len() as f64);
2038        features.push(self.key_indicators.len() as f64);
2039        features.push(self.recommended_methods.len() as f64);
2040        features.push(self.related_entity_ids.len() as f64);
2041        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2042        features.push(self.scheme_stage.unwrap_or(0) as f64);
2043        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2044
2045        features
2046    }
2047
2048    /// Returns the number of features in the extended feature vector.
2049    pub fn feature_count() -> usize {
2050        30 // 15 base + 15 extended
2051    }
2052
2053    /// Returns feature names for the extended feature vector.
2054    pub fn feature_names() -> Vec<&'static str> {
2055        let mut names = LabeledAnomaly::feature_names();
2056        names.extend(vec![
2057            "severity_score",
2058            "severity_level_score",
2059            "is_material",
2060            "difficulty_score",
2061            "expected_detection_rate",
2062            "ground_truth_certainty",
2063            "category_ordinal",
2064            "secondary_category_count",
2065            "contributing_factor_count",
2066            "key_indicator_count",
2067            "recommended_method_count",
2068            "related_entity_count",
2069            "is_part_of_scheme",
2070            "scheme_stage",
2071            "is_near_miss",
2072        ]);
2073        names
2074    }
2075}
2076
2077// ============================================================================
2078// MULTI-STAGE FRAUD SCHEME TYPES
2079// ============================================================================
2080
2081/// Type of multi-stage fraud scheme.
2082#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2083pub enum SchemeType {
2084    /// Gradual embezzlement over time.
2085    GradualEmbezzlement,
2086    /// Revenue manipulation across periods.
2087    RevenueManipulation,
2088    /// Vendor kickback scheme.
2089    VendorKickback,
2090    /// Round-tripping funds through multiple entities.
2091    RoundTripping,
2092    /// Ghost employee scheme.
2093    GhostEmployee,
2094    /// Expense reimbursement fraud.
2095    ExpenseReimbursement,
2096    /// Inventory theft scheme.
2097    InventoryTheft,
2098    /// Custom scheme type.
2099    Custom,
2100}
2101
2102impl SchemeType {
2103    /// Returns the name of this scheme type.
2104    pub fn name(&self) -> &'static str {
2105        match self {
2106            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2107            SchemeType::RevenueManipulation => "revenue_manipulation",
2108            SchemeType::VendorKickback => "vendor_kickback",
2109            SchemeType::RoundTripping => "round_tripping",
2110            SchemeType::GhostEmployee => "ghost_employee",
2111            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2112            SchemeType::InventoryTheft => "inventory_theft",
2113            SchemeType::Custom => "custom",
2114        }
2115    }
2116
2117    /// Returns the typical number of stages for this scheme type.
2118    pub fn typical_stages(&self) -> u32 {
2119        match self {
2120            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2121            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2122            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2123            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2124            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2125            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2126            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2127            SchemeType::Custom => 4,
2128        }
2129    }
2130}
2131
2132/// Status of detection for a fraud scheme.
2133#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2134pub enum SchemeDetectionStatus {
2135    /// Scheme is undetected.
2136    #[default]
2137    Undetected,
2138    /// Under investigation but not confirmed.
2139    UnderInvestigation,
2140    /// Partially detected (some transactions flagged).
2141    PartiallyDetected,
2142    /// Fully detected and confirmed.
2143    FullyDetected,
2144}
2145
2146/// Reference to a transaction within a scheme.
2147#[derive(Debug, Clone, Serialize, Deserialize)]
2148pub struct SchemeTransactionRef {
2149    /// Document ID of the transaction.
2150    pub document_id: String,
2151    /// Transaction date.
2152    pub date: chrono::NaiveDate,
2153    /// Transaction amount.
2154    pub amount: Decimal,
2155    /// Stage this transaction belongs to.
2156    pub stage: u32,
2157    /// Anomaly ID if labeled.
2158    #[serde(default, skip_serializing_if = "Option::is_none")]
2159    pub anomaly_id: Option<String>,
2160}
2161
2162/// Concealment technique used in fraud.
2163#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2164pub enum ConcealmentTechnique {
2165    /// Document manipulation or forgery.
2166    DocumentManipulation,
2167    /// Circumventing approval processes.
2168    ApprovalCircumvention,
2169    /// Exploiting timing (period-end, holidays).
2170    TimingExploitation,
2171    /// Transaction splitting to avoid thresholds.
2172    TransactionSplitting,
2173    /// Account misclassification.
2174    AccountMisclassification,
2175    /// Collusion with other employees.
2176    Collusion,
2177    /// Data alteration or deletion.
2178    DataAlteration,
2179    /// Creating false documentation.
2180    FalseDocumentation,
2181}
2182
2183impl ConcealmentTechnique {
2184    /// Returns the difficulty bonus this technique adds.
2185    pub fn difficulty_bonus(&self) -> f64 {
2186        match self {
2187            ConcealmentTechnique::DocumentManipulation => 0.20,
2188            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2189            ConcealmentTechnique::TimingExploitation => 0.10,
2190            ConcealmentTechnique::TransactionSplitting => 0.15,
2191            ConcealmentTechnique::AccountMisclassification => 0.10,
2192            ConcealmentTechnique::Collusion => 0.25,
2193            ConcealmentTechnique::DataAlteration => 0.20,
2194            ConcealmentTechnique::FalseDocumentation => 0.15,
2195        }
2196    }
2197}
2198
2199// ============================================================================
2200// ACFE-ALIGNED FRAUD TAXONOMY
2201// ============================================================================
2202//
2203// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2204// Nations: Occupational Fraud Classification System. This taxonomy provides
2205// ACFE-aligned categories, schemes, and calibration data.
2206
2207/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2208///
2209/// ACFE Report to the Nations statistics (typical):
2210/// - Asset Misappropriation: 86% of cases, $100k median loss
2211/// - Corruption: 33% of cases, $150k median loss
2212/// - Financial Statement Fraud: 10% of cases, $954k median loss
2213///
2214/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2215#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2216pub enum AcfeFraudCategory {
2217    /// Theft of organizational assets (cash, inventory, equipment).
2218    /// Most common (86% of cases) but typically lowest median loss ($100k).
2219    #[default]
2220    AssetMisappropriation,
2221    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2222    /// Medium frequency (33% of cases), medium median loss ($150k).
2223    Corruption,
2224    /// Intentional misstatement of financial statements.
2225    /// Least common (10% of cases) but highest median loss ($954k).
2226    FinancialStatementFraud,
2227}
2228
2229impl AcfeFraudCategory {
2230    /// Returns the name of this category.
2231    pub fn name(&self) -> &'static str {
2232        match self {
2233            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2234            AcfeFraudCategory::Corruption => "corruption",
2235            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2236        }
2237    }
2238
2239    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2240    pub fn typical_occurrence_rate(&self) -> f64 {
2241        match self {
2242            AcfeFraudCategory::AssetMisappropriation => 0.86,
2243            AcfeFraudCategory::Corruption => 0.33,
2244            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2245        }
2246    }
2247
2248    /// Returns the typical median loss amount (from ACFE reports).
2249    pub fn typical_median_loss(&self) -> Decimal {
2250        match self {
2251            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2252            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2253            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2254        }
2255    }
2256
2257    /// Returns the typical detection time in months (from ACFE reports).
2258    pub fn typical_detection_months(&self) -> u32 {
2259        match self {
2260            AcfeFraudCategory::AssetMisappropriation => 12,
2261            AcfeFraudCategory::Corruption => 18,
2262            AcfeFraudCategory::FinancialStatementFraud => 24,
2263        }
2264    }
2265}
2266
2267/// Cash-based fraud schemes under Asset Misappropriation.
2268///
2269/// Organized according to the ACFE Fraud Tree:
2270/// - Theft of Cash on Hand
2271/// - Theft of Cash Receipts
2272/// - Fraudulent Disbursements
2273#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2274pub enum CashFraudScheme {
2275    // ========== Theft of Cash on Hand ==========
2276    /// Stealing cash from cash drawers or safes after it has been recorded.
2277    Larceny,
2278    /// Stealing cash before it is recorded in the books (intercepts receipts).
2279    Skimming,
2280
2281    // ========== Theft of Cash Receipts ==========
2282    /// Skimming from sales transactions before recording.
2283    SalesSkimming,
2284    /// Intercepting customer payments on accounts receivable.
2285    ReceivablesSkimming,
2286    /// Creating false refunds to pocket the difference.
2287    RefundSchemes,
2288
2289    // ========== Fraudulent Disbursements - Billing Schemes ==========
2290    /// Creating fictitious vendors to invoice and pay.
2291    ShellCompany,
2292    /// Manipulating payments to legitimate vendors for personal gain.
2293    NonAccompliceVendor,
2294    /// Using company funds for personal purchases.
2295    PersonalPurchases,
2296
2297    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2298    /// Creating fake employees to collect wages.
2299    GhostEmployee,
2300    /// Falsifying hours worked, sales commissions, or salary rates.
2301    FalsifiedWages,
2302    /// Manipulating commission calculations.
2303    CommissionSchemes,
2304
2305    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2306    /// Claiming non-business expenses as business expenses.
2307    MischaracterizedExpenses,
2308    /// Inflating legitimate expense amounts.
2309    OverstatedExpenses,
2310    /// Creating completely fictitious expenses.
2311    FictitiousExpenses,
2312
2313    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2314    /// Forging the signature of an authorized check signer.
2315    ForgedMaker,
2316    /// Intercepting and altering the endorsement on legitimate checks.
2317    ForgedEndorsement,
2318    /// Altering the payee on a legitimate check.
2319    AlteredPayee,
2320    /// Authorized signer writing checks for personal benefit.
2321    AuthorizedMaker,
2322
2323    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2324    /// Creating false voided transactions.
2325    FalseVoids,
2326    /// Processing fictitious refunds.
2327    FalseRefunds,
2328}
2329
2330impl CashFraudScheme {
2331    /// Returns the ACFE category this scheme belongs to.
2332    pub fn category(&self) -> AcfeFraudCategory {
2333        AcfeFraudCategory::AssetMisappropriation
2334    }
2335
2336    /// Returns the subcategory within the ACFE Fraud Tree.
2337    pub fn subcategory(&self) -> &'static str {
2338        match self {
2339            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2340            CashFraudScheme::SalesSkimming
2341            | CashFraudScheme::ReceivablesSkimming
2342            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2343            CashFraudScheme::ShellCompany
2344            | CashFraudScheme::NonAccompliceVendor
2345            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2346            CashFraudScheme::GhostEmployee
2347            | CashFraudScheme::FalsifiedWages
2348            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2349            CashFraudScheme::MischaracterizedExpenses
2350            | CashFraudScheme::OverstatedExpenses
2351            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2352            CashFraudScheme::ForgedMaker
2353            | CashFraudScheme::ForgedEndorsement
2354            | CashFraudScheme::AlteredPayee
2355            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2356            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2357        }
2358    }
2359
2360    /// Returns the typical severity (1-5) for this scheme.
2361    pub fn severity(&self) -> u8 {
2362        match self {
2363            // Lower severity - often small amounts, easier to detect
2364            CashFraudScheme::FalseVoids
2365            | CashFraudScheme::FalseRefunds
2366            | CashFraudScheme::MischaracterizedExpenses => 3,
2367            // Medium severity
2368            CashFraudScheme::OverstatedExpenses
2369            | CashFraudScheme::Skimming
2370            | CashFraudScheme::Larceny
2371            | CashFraudScheme::PersonalPurchases
2372            | CashFraudScheme::FalsifiedWages => 4,
2373            // Higher severity - larger amounts, harder to detect
2374            CashFraudScheme::ShellCompany
2375            | CashFraudScheme::GhostEmployee
2376            | CashFraudScheme::FictitiousExpenses
2377            | CashFraudScheme::ForgedMaker
2378            | CashFraudScheme::AuthorizedMaker => 5,
2379            _ => 4,
2380        }
2381    }
2382
2383    /// Returns the typical detection difficulty.
2384    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2385        match self {
2386            // Easy to detect with basic controls
2387            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2388                AnomalyDetectionDifficulty::Easy
2389            }
2390            // Moderate - requires reconciliation
2391            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2392                AnomalyDetectionDifficulty::Moderate
2393            }
2394            // Hard - requires sophisticated analysis
2395            CashFraudScheme::Skimming
2396            | CashFraudScheme::ShellCompany
2397            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2398            // Expert level
2399            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2400                AnomalyDetectionDifficulty::Expert
2401            }
2402            _ => AnomalyDetectionDifficulty::Moderate,
2403        }
2404    }
2405
2406    /// Returns all variants for iteration.
2407    pub fn all_variants() -> &'static [CashFraudScheme] {
2408        &[
2409            CashFraudScheme::Larceny,
2410            CashFraudScheme::Skimming,
2411            CashFraudScheme::SalesSkimming,
2412            CashFraudScheme::ReceivablesSkimming,
2413            CashFraudScheme::RefundSchemes,
2414            CashFraudScheme::ShellCompany,
2415            CashFraudScheme::NonAccompliceVendor,
2416            CashFraudScheme::PersonalPurchases,
2417            CashFraudScheme::GhostEmployee,
2418            CashFraudScheme::FalsifiedWages,
2419            CashFraudScheme::CommissionSchemes,
2420            CashFraudScheme::MischaracterizedExpenses,
2421            CashFraudScheme::OverstatedExpenses,
2422            CashFraudScheme::FictitiousExpenses,
2423            CashFraudScheme::ForgedMaker,
2424            CashFraudScheme::ForgedEndorsement,
2425            CashFraudScheme::AlteredPayee,
2426            CashFraudScheme::AuthorizedMaker,
2427            CashFraudScheme::FalseVoids,
2428            CashFraudScheme::FalseRefunds,
2429        ]
2430    }
2431}
2432
2433/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2434#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2435pub enum AssetFraudScheme {
2436    // ========== Inventory Schemes ==========
2437    /// Misusing or converting inventory for personal benefit.
2438    InventoryMisuse,
2439    /// Stealing physical inventory items.
2440    InventoryTheft,
2441    /// Manipulating purchasing to facilitate theft.
2442    InventoryPurchasingScheme,
2443    /// Manipulating receiving/shipping to steal inventory.
2444    InventoryReceivingScheme,
2445
2446    // ========== Other Asset Schemes ==========
2447    /// Misusing company equipment or vehicles.
2448    EquipmentMisuse,
2449    /// Theft of company equipment, tools, or supplies.
2450    EquipmentTheft,
2451    /// Unauthorized access to or theft of intellectual property.
2452    IntellectualPropertyTheft,
2453    /// Using company time/resources for personal business.
2454    TimeTheft,
2455}
2456
2457impl AssetFraudScheme {
2458    /// Returns the ACFE category this scheme belongs to.
2459    pub fn category(&self) -> AcfeFraudCategory {
2460        AcfeFraudCategory::AssetMisappropriation
2461    }
2462
2463    /// Returns the subcategory within the ACFE Fraud Tree.
2464    pub fn subcategory(&self) -> &'static str {
2465        match self {
2466            AssetFraudScheme::InventoryMisuse
2467            | AssetFraudScheme::InventoryTheft
2468            | AssetFraudScheme::InventoryPurchasingScheme
2469            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2470            _ => "other_assets",
2471        }
2472    }
2473
2474    /// Returns the typical severity (1-5) for this scheme.
2475    pub fn severity(&self) -> u8 {
2476        match self {
2477            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2478            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2479            AssetFraudScheme::InventoryTheft
2480            | AssetFraudScheme::InventoryPurchasingScheme
2481            | AssetFraudScheme::InventoryReceivingScheme => 4,
2482            AssetFraudScheme::IntellectualPropertyTheft => 5,
2483        }
2484    }
2485}
2486
2487/// Corruption schemes under the ACFE Fraud Tree.
2488///
2489/// Corruption schemes involve the wrongful use of influence in a business
2490/// transaction to procure personal benefit.
2491#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2492pub enum CorruptionScheme {
2493    // ========== Conflicts of Interest ==========
2494    /// Employee has undisclosed financial interest in purchasing decisions.
2495    PurchasingConflict,
2496    /// Employee has undisclosed relationship with customer/vendor.
2497    SalesConflict,
2498    /// Employee owns or has interest in competing business.
2499    OutsideBusinessInterest,
2500    /// Employee makes decisions benefiting family members.
2501    NepotismConflict,
2502
2503    // ========== Bribery ==========
2504    /// Kickback payments from vendors for favorable treatment.
2505    InvoiceKickback,
2506    /// Collusion among vendors to inflate prices.
2507    BidRigging,
2508    /// Other cash payments for favorable decisions.
2509    CashBribery,
2510    /// Bribery of government officials.
2511    PublicOfficial,
2512
2513    // ========== Illegal Gratuities ==========
2514    /// Gifts given after favorable decisions (not agreed in advance).
2515    IllegalGratuity,
2516
2517    // ========== Economic Extortion ==========
2518    /// Demanding payment under threat of adverse action.
2519    EconomicExtortion,
2520}
2521
2522impl CorruptionScheme {
2523    /// Returns the ACFE category this scheme belongs to.
2524    pub fn category(&self) -> AcfeFraudCategory {
2525        AcfeFraudCategory::Corruption
2526    }
2527
2528    /// Returns the subcategory within the ACFE Fraud Tree.
2529    pub fn subcategory(&self) -> &'static str {
2530        match self {
2531            CorruptionScheme::PurchasingConflict
2532            | CorruptionScheme::SalesConflict
2533            | CorruptionScheme::OutsideBusinessInterest
2534            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2535            CorruptionScheme::InvoiceKickback
2536            | CorruptionScheme::BidRigging
2537            | CorruptionScheme::CashBribery
2538            | CorruptionScheme::PublicOfficial => "bribery",
2539            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2540            CorruptionScheme::EconomicExtortion => "economic_extortion",
2541        }
2542    }
2543
2544    /// Returns the typical severity (1-5) for this scheme.
2545    pub fn severity(&self) -> u8 {
2546        match self {
2547            // Lower severity conflicts of interest
2548            CorruptionScheme::NepotismConflict => 3,
2549            // Medium severity
2550            CorruptionScheme::PurchasingConflict
2551            | CorruptionScheme::SalesConflict
2552            | CorruptionScheme::OutsideBusinessInterest
2553            | CorruptionScheme::IllegalGratuity => 4,
2554            // High severity - active corruption
2555            CorruptionScheme::InvoiceKickback
2556            | CorruptionScheme::BidRigging
2557            | CorruptionScheme::CashBribery
2558            | CorruptionScheme::EconomicExtortion => 5,
2559            // Highest severity - involves public officials
2560            CorruptionScheme::PublicOfficial => 5,
2561        }
2562    }
2563
2564    /// Returns the typical detection difficulty.
2565    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2566        match self {
2567            // Easier to detect with proper disclosure requirements
2568            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2569                AnomalyDetectionDifficulty::Moderate
2570            }
2571            // Hard - requires transaction pattern analysis
2572            CorruptionScheme::PurchasingConflict
2573            | CorruptionScheme::SalesConflict
2574            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2575            // Expert level - deliberate concealment
2576            CorruptionScheme::InvoiceKickback
2577            | CorruptionScheme::CashBribery
2578            | CorruptionScheme::PublicOfficial
2579            | CorruptionScheme::IllegalGratuity
2580            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2581        }
2582    }
2583
2584    /// Returns all variants for iteration.
2585    pub fn all_variants() -> &'static [CorruptionScheme] {
2586        &[
2587            CorruptionScheme::PurchasingConflict,
2588            CorruptionScheme::SalesConflict,
2589            CorruptionScheme::OutsideBusinessInterest,
2590            CorruptionScheme::NepotismConflict,
2591            CorruptionScheme::InvoiceKickback,
2592            CorruptionScheme::BidRigging,
2593            CorruptionScheme::CashBribery,
2594            CorruptionScheme::PublicOfficial,
2595            CorruptionScheme::IllegalGratuity,
2596            CorruptionScheme::EconomicExtortion,
2597        ]
2598    }
2599}
2600
2601/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2602///
2603/// Financial statement fraud involves the intentional misstatement or omission
2604/// of material information in financial reports.
2605#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2606pub enum FinancialStatementScheme {
2607    // ========== Asset/Revenue Overstatement ==========
2608    /// Recording revenue before it is earned.
2609    PrematureRevenue,
2610    /// Deferring expenses to future periods.
2611    DelayedExpenses,
2612    /// Recording revenue for transactions that never occurred.
2613    FictitiousRevenues,
2614    /// Failing to record known liabilities.
2615    ConcealedLiabilities,
2616    /// Overstating the value of assets.
2617    ImproperAssetValuations,
2618    /// Omitting or misstating required disclosures.
2619    ImproperDisclosures,
2620    /// Manipulating timing of revenue recognition (channel stuffing).
2621    ChannelStuffing,
2622    /// Recognizing bill-and-hold revenue improperly.
2623    BillAndHold,
2624    /// Capitalizing expenses that should be expensed.
2625    ImproperCapitalization,
2626
2627    // ========== Asset/Revenue Understatement ==========
2628    /// Understating revenue (often for tax purposes).
2629    UnderstatedRevenues,
2630    /// Recording excessive expenses.
2631    OverstatedExpenses,
2632    /// Recording excessive liabilities or reserves.
2633    OverstatedLiabilities,
2634    /// Undervaluing assets for writedowns/reserves.
2635    ImproperAssetWritedowns,
2636}
2637
2638impl FinancialStatementScheme {
2639    /// Returns the ACFE category this scheme belongs to.
2640    pub fn category(&self) -> AcfeFraudCategory {
2641        AcfeFraudCategory::FinancialStatementFraud
2642    }
2643
2644    /// Returns the subcategory within the ACFE Fraud Tree.
2645    pub fn subcategory(&self) -> &'static str {
2646        match self {
2647            FinancialStatementScheme::UnderstatedRevenues
2648            | FinancialStatementScheme::OverstatedExpenses
2649            | FinancialStatementScheme::OverstatedLiabilities
2650            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2651            _ => "overstatement",
2652        }
2653    }
2654
2655    /// Returns the typical severity (1-5) for this scheme.
2656    pub fn severity(&self) -> u8 {
2657        // All financial statement fraud is high severity
2658        5
2659    }
2660
2661    /// Returns the typical detection difficulty.
2662    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2663        match self {
2664            // Easier to detect with good analytics
2665            FinancialStatementScheme::ChannelStuffing
2666            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2667            // Hard - requires deep analysis
2668            FinancialStatementScheme::PrematureRevenue
2669            | FinancialStatementScheme::ImproperCapitalization
2670            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2671            // Expert level
2672            FinancialStatementScheme::FictitiousRevenues
2673            | FinancialStatementScheme::ConcealedLiabilities
2674            | FinancialStatementScheme::ImproperAssetValuations
2675            | FinancialStatementScheme::ImproperDisclosures
2676            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2677            _ => AnomalyDetectionDifficulty::Hard,
2678        }
2679    }
2680
2681    /// Returns all variants for iteration.
2682    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2683        &[
2684            FinancialStatementScheme::PrematureRevenue,
2685            FinancialStatementScheme::DelayedExpenses,
2686            FinancialStatementScheme::FictitiousRevenues,
2687            FinancialStatementScheme::ConcealedLiabilities,
2688            FinancialStatementScheme::ImproperAssetValuations,
2689            FinancialStatementScheme::ImproperDisclosures,
2690            FinancialStatementScheme::ChannelStuffing,
2691            FinancialStatementScheme::BillAndHold,
2692            FinancialStatementScheme::ImproperCapitalization,
2693            FinancialStatementScheme::UnderstatedRevenues,
2694            FinancialStatementScheme::OverstatedExpenses,
2695            FinancialStatementScheme::OverstatedLiabilities,
2696            FinancialStatementScheme::ImproperAssetWritedowns,
2697        ]
2698    }
2699}
2700
2701/// Unified ACFE scheme type that encompasses all fraud schemes.
2702#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2703pub enum AcfeScheme {
2704    /// Cash-based fraud schemes.
2705    Cash(CashFraudScheme),
2706    /// Inventory and other asset fraud schemes.
2707    Asset(AssetFraudScheme),
2708    /// Corruption schemes.
2709    Corruption(CorruptionScheme),
2710    /// Financial statement fraud schemes.
2711    FinancialStatement(FinancialStatementScheme),
2712}
2713
2714impl AcfeScheme {
2715    /// Returns the ACFE category this scheme belongs to.
2716    pub fn category(&self) -> AcfeFraudCategory {
2717        match self {
2718            AcfeScheme::Cash(s) => s.category(),
2719            AcfeScheme::Asset(s) => s.category(),
2720            AcfeScheme::Corruption(s) => s.category(),
2721            AcfeScheme::FinancialStatement(s) => s.category(),
2722        }
2723    }
2724
2725    /// Returns the severity (1-5) for this scheme.
2726    pub fn severity(&self) -> u8 {
2727        match self {
2728            AcfeScheme::Cash(s) => s.severity(),
2729            AcfeScheme::Asset(s) => s.severity(),
2730            AcfeScheme::Corruption(s) => s.severity(),
2731            AcfeScheme::FinancialStatement(s) => s.severity(),
2732        }
2733    }
2734
2735    /// Returns the detection difficulty for this scheme.
2736    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2737        match self {
2738            AcfeScheme::Cash(s) => s.detection_difficulty(),
2739            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2740            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2741            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2742        }
2743    }
2744}
2745
2746/// How a fraud was detected (from ACFE statistics).
2747#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2748pub enum AcfeDetectionMethod {
2749    /// Tip from employee, customer, vendor, or anonymous source.
2750    Tip,
2751    /// Internal audit procedures.
2752    InternalAudit,
2753    /// Management review and oversight.
2754    ManagementReview,
2755    /// External audit procedures.
2756    ExternalAudit,
2757    /// Account reconciliation discrepancies.
2758    AccountReconciliation,
2759    /// Document examination.
2760    DocumentExamination,
2761    /// Discovered by accident.
2762    ByAccident,
2763    /// Automated monitoring/IT controls.
2764    ItControls,
2765    /// Surveillance or investigation.
2766    Surveillance,
2767    /// Confession by perpetrator.
2768    Confession,
2769    /// Law enforcement notification.
2770    LawEnforcement,
2771    /// Other detection method.
2772    Other,
2773}
2774
2775impl AcfeDetectionMethod {
2776    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2777    pub fn typical_detection_rate(&self) -> f64 {
2778        match self {
2779            AcfeDetectionMethod::Tip => 0.42,
2780            AcfeDetectionMethod::InternalAudit => 0.16,
2781            AcfeDetectionMethod::ManagementReview => 0.12,
2782            AcfeDetectionMethod::ExternalAudit => 0.04,
2783            AcfeDetectionMethod::AccountReconciliation => 0.05,
2784            AcfeDetectionMethod::DocumentExamination => 0.04,
2785            AcfeDetectionMethod::ByAccident => 0.06,
2786            AcfeDetectionMethod::ItControls => 0.03,
2787            AcfeDetectionMethod::Surveillance => 0.02,
2788            AcfeDetectionMethod::Confession => 0.02,
2789            AcfeDetectionMethod::LawEnforcement => 0.01,
2790            AcfeDetectionMethod::Other => 0.03,
2791        }
2792    }
2793
2794    /// Returns all variants for iteration.
2795    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2796        &[
2797            AcfeDetectionMethod::Tip,
2798            AcfeDetectionMethod::InternalAudit,
2799            AcfeDetectionMethod::ManagementReview,
2800            AcfeDetectionMethod::ExternalAudit,
2801            AcfeDetectionMethod::AccountReconciliation,
2802            AcfeDetectionMethod::DocumentExamination,
2803            AcfeDetectionMethod::ByAccident,
2804            AcfeDetectionMethod::ItControls,
2805            AcfeDetectionMethod::Surveillance,
2806            AcfeDetectionMethod::Confession,
2807            AcfeDetectionMethod::LawEnforcement,
2808            AcfeDetectionMethod::Other,
2809        ]
2810    }
2811}
2812
2813/// Department/position of perpetrator (from ACFE statistics).
2814#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2815pub enum PerpetratorDepartment {
2816    /// Accounting, finance, or bookkeeping.
2817    Accounting,
2818    /// Operations or manufacturing.
2819    Operations,
2820    /// Executive/upper management.
2821    Executive,
2822    /// Sales.
2823    Sales,
2824    /// Customer service.
2825    CustomerService,
2826    /// Purchasing/procurement.
2827    Purchasing,
2828    /// Information technology.
2829    It,
2830    /// Human resources.
2831    HumanResources,
2832    /// Administrative/clerical.
2833    Administrative,
2834    /// Warehouse/inventory.
2835    Warehouse,
2836    /// Board of directors.
2837    BoardOfDirectors,
2838    /// Other department.
2839    Other,
2840}
2841
2842impl PerpetratorDepartment {
2843    /// Returns the typical percentage of frauds by department (from ACFE reports).
2844    pub fn typical_occurrence_rate(&self) -> f64 {
2845        match self {
2846            PerpetratorDepartment::Accounting => 0.21,
2847            PerpetratorDepartment::Operations => 0.17,
2848            PerpetratorDepartment::Executive => 0.12,
2849            PerpetratorDepartment::Sales => 0.11,
2850            PerpetratorDepartment::CustomerService => 0.07,
2851            PerpetratorDepartment::Purchasing => 0.06,
2852            PerpetratorDepartment::It => 0.05,
2853            PerpetratorDepartment::HumanResources => 0.04,
2854            PerpetratorDepartment::Administrative => 0.04,
2855            PerpetratorDepartment::Warehouse => 0.03,
2856            PerpetratorDepartment::BoardOfDirectors => 0.02,
2857            PerpetratorDepartment::Other => 0.08,
2858        }
2859    }
2860
2861    /// Returns the typical median loss by perpetrator department.
2862    pub fn typical_median_loss(&self) -> Decimal {
2863        match self {
2864            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2865            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2866            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2867            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2868            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2869            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2870            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2871            _ => Decimal::new(80_000, 0),
2872        }
2873    }
2874}
2875
2876/// Perpetrator position level (from ACFE statistics).
2877#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2878pub enum PerpetratorLevel {
2879    /// Entry-level employee.
2880    Employee,
2881    /// Manager or supervisor.
2882    Manager,
2883    /// Owner, executive, or C-level.
2884    OwnerExecutive,
2885}
2886
2887impl PerpetratorLevel {
2888    /// Returns the typical percentage of frauds by position level.
2889    pub fn typical_occurrence_rate(&self) -> f64 {
2890        match self {
2891            PerpetratorLevel::Employee => 0.42,
2892            PerpetratorLevel::Manager => 0.36,
2893            PerpetratorLevel::OwnerExecutive => 0.22,
2894        }
2895    }
2896
2897    /// Returns the typical median loss by position level.
2898    pub fn typical_median_loss(&self) -> Decimal {
2899        match self {
2900            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2901            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2902            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2903        }
2904    }
2905}
2906
2907/// ACFE Calibration data for fraud generation.
2908///
2909/// Contains statistical parameters based on ACFE Report to the Nations
2910/// for realistic fraud pattern generation.
2911#[derive(Debug, Clone, Serialize, Deserialize)]
2912pub struct AcfeCalibration {
2913    /// Overall median loss for occupational fraud ($117,000 typical).
2914    pub median_loss: Decimal,
2915    /// Median duration in months before detection (12 months typical).
2916    pub median_duration_months: u32,
2917    /// Distribution of fraud by category.
2918    pub category_distribution: HashMap<String, f64>,
2919    /// Distribution of detection methods.
2920    pub detection_method_distribution: HashMap<String, f64>,
2921    /// Distribution by perpetrator department.
2922    pub department_distribution: HashMap<String, f64>,
2923    /// Distribution by perpetrator level.
2924    pub level_distribution: HashMap<String, f64>,
2925    /// Average number of red flags per fraud case.
2926    pub avg_red_flags_per_case: f64,
2927    /// Percentage of frauds involving collusion.
2928    pub collusion_rate: f64,
2929}
2930
2931impl Default for AcfeCalibration {
2932    fn default() -> Self {
2933        let mut category_distribution = HashMap::new();
2934        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2935        category_distribution.insert("corruption".to_string(), 0.33);
2936        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2937
2938        let mut detection_method_distribution = HashMap::new();
2939        for method in AcfeDetectionMethod::all_variants() {
2940            detection_method_distribution.insert(
2941                format!("{:?}", method).to_lowercase(),
2942                method.typical_detection_rate(),
2943            );
2944        }
2945
2946        let mut department_distribution = HashMap::new();
2947        department_distribution.insert("accounting".to_string(), 0.21);
2948        department_distribution.insert("operations".to_string(), 0.17);
2949        department_distribution.insert("executive".to_string(), 0.12);
2950        department_distribution.insert("sales".to_string(), 0.11);
2951        department_distribution.insert("customer_service".to_string(), 0.07);
2952        department_distribution.insert("purchasing".to_string(), 0.06);
2953        department_distribution.insert("other".to_string(), 0.26);
2954
2955        let mut level_distribution = HashMap::new();
2956        level_distribution.insert("employee".to_string(), 0.42);
2957        level_distribution.insert("manager".to_string(), 0.36);
2958        level_distribution.insert("owner_executive".to_string(), 0.22);
2959
2960        Self {
2961            median_loss: Decimal::new(117_000, 0),
2962            median_duration_months: 12,
2963            category_distribution,
2964            detection_method_distribution,
2965            department_distribution,
2966            level_distribution,
2967            avg_red_flags_per_case: 2.8,
2968            collusion_rate: 0.50,
2969        }
2970    }
2971}
2972
2973impl AcfeCalibration {
2974    /// Creates a new ACFE calibration with the given parameters.
2975    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
2976        Self {
2977            median_loss,
2978            median_duration_months,
2979            ..Self::default()
2980        }
2981    }
2982
2983    /// Returns the median loss for a specific category.
2984    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
2985        category.typical_median_loss()
2986    }
2987
2988    /// Returns the median duration for a specific category.
2989    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
2990        category.typical_detection_months()
2991    }
2992
2993    /// Validates the calibration data.
2994    pub fn validate(&self) -> Result<(), String> {
2995        if self.median_loss <= Decimal::ZERO {
2996            return Err("Median loss must be positive".to_string());
2997        }
2998        if self.median_duration_months == 0 {
2999            return Err("Median duration must be at least 1 month".to_string());
3000        }
3001        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3002            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3003        }
3004        Ok(())
3005    }
3006}
3007
3008/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3009///
3010/// The fraud triangle is a model for explaining the factors that cause
3011/// someone to commit occupational fraud.
3012#[derive(Debug, Clone, Serialize, Deserialize)]
3013pub struct FraudTriangle {
3014    /// Pressure or incentive to commit fraud.
3015    pub pressure: PressureType,
3016    /// Opportunity factors that enable fraud.
3017    pub opportunities: Vec<OpportunityFactor>,
3018    /// Rationalization used to justify the fraud.
3019    pub rationalization: Rationalization,
3020}
3021
3022impl FraudTriangle {
3023    /// Creates a new fraud triangle.
3024    pub fn new(
3025        pressure: PressureType,
3026        opportunities: Vec<OpportunityFactor>,
3027        rationalization: Rationalization,
3028    ) -> Self {
3029        Self {
3030            pressure,
3031            opportunities,
3032            rationalization,
3033        }
3034    }
3035
3036    /// Returns a risk score based on the fraud triangle components.
3037    pub fn risk_score(&self) -> f64 {
3038        let pressure_score = self.pressure.risk_weight();
3039        let opportunity_score: f64 = self
3040            .opportunities
3041            .iter()
3042            .map(|o| o.risk_weight())
3043            .sum::<f64>()
3044            / self.opportunities.len().max(1) as f64;
3045        let rationalization_score = self.rationalization.risk_weight();
3046
3047        (pressure_score + opportunity_score + rationalization_score) / 3.0
3048    }
3049}
3050
3051/// Types of pressure/incentive that can lead to fraud.
3052#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3053pub enum PressureType {
3054    // Financial Pressures
3055    /// Personal financial difficulties (debt, lifestyle beyond means).
3056    PersonalFinancialDifficulties,
3057    /// Pressure to meet financial targets/earnings expectations.
3058    FinancialTargets,
3059    /// Market or analyst expectations.
3060    MarketExpectations,
3061    /// Debt covenant compliance requirements.
3062    CovenantCompliance,
3063    /// Credit rating maintenance.
3064    CreditRatingMaintenance,
3065    /// Acquisition/merger valuation pressure.
3066    AcquisitionValuation,
3067
3068    // Non-Financial Pressures
3069    /// Fear of job loss.
3070    JobSecurity,
3071    /// Pressure to maintain status or image.
3072    StatusMaintenance,
3073    /// Gambling addiction.
3074    GamblingAddiction,
3075    /// Substance abuse issues.
3076    SubstanceAbuse,
3077    /// Family pressure or obligations.
3078    FamilyPressure,
3079    /// Greed or desire for more.
3080    Greed,
3081}
3082
3083impl PressureType {
3084    /// Returns the risk weight (0.0-1.0) for this pressure type.
3085    pub fn risk_weight(&self) -> f64 {
3086        match self {
3087            PressureType::PersonalFinancialDifficulties => 0.80,
3088            PressureType::FinancialTargets => 0.75,
3089            PressureType::MarketExpectations => 0.70,
3090            PressureType::CovenantCompliance => 0.85,
3091            PressureType::CreditRatingMaintenance => 0.70,
3092            PressureType::AcquisitionValuation => 0.75,
3093            PressureType::JobSecurity => 0.65,
3094            PressureType::StatusMaintenance => 0.55,
3095            PressureType::GamblingAddiction => 0.90,
3096            PressureType::SubstanceAbuse => 0.85,
3097            PressureType::FamilyPressure => 0.60,
3098            PressureType::Greed => 0.70,
3099        }
3100    }
3101}
3102
3103/// Opportunity factors that enable fraud.
3104#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3105pub enum OpportunityFactor {
3106    /// Weak internal controls.
3107    WeakInternalControls,
3108    /// Lack of segregation of duties.
3109    LackOfSegregation,
3110    /// Override capability.
3111    ManagementOverride,
3112    /// Complex or unusual transactions.
3113    ComplexTransactions,
3114    /// Related party transactions.
3115    RelatedPartyTransactions,
3116    /// Poor tone at the top.
3117    PoorToneAtTop,
3118    /// Inadequate supervision.
3119    InadequateSupervision,
3120    /// Access to assets without accountability.
3121    AssetAccess,
3122    /// Inadequate record keeping.
3123    PoorRecordKeeping,
3124    /// Failure to discipline fraud perpetrators.
3125    LackOfDiscipline,
3126    /// Lack of independent checks.
3127    LackOfIndependentChecks,
3128}
3129
3130impl OpportunityFactor {
3131    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3132    pub fn risk_weight(&self) -> f64 {
3133        match self {
3134            OpportunityFactor::WeakInternalControls => 0.85,
3135            OpportunityFactor::LackOfSegregation => 0.80,
3136            OpportunityFactor::ManagementOverride => 0.90,
3137            OpportunityFactor::ComplexTransactions => 0.70,
3138            OpportunityFactor::RelatedPartyTransactions => 0.75,
3139            OpportunityFactor::PoorToneAtTop => 0.85,
3140            OpportunityFactor::InadequateSupervision => 0.75,
3141            OpportunityFactor::AssetAccess => 0.70,
3142            OpportunityFactor::PoorRecordKeeping => 0.65,
3143            OpportunityFactor::LackOfDiscipline => 0.60,
3144            OpportunityFactor::LackOfIndependentChecks => 0.75,
3145        }
3146    }
3147}
3148
3149/// Rationalizations used by fraud perpetrators.
3150#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3151pub enum Rationalization {
3152    /// "I'm just borrowing; I'll pay it back."
3153    TemporaryBorrowing,
3154    /// "Everyone does it."
3155    EveryoneDoesIt,
3156    /// "It's for the good of the company."
3157    ForTheCompanyGood,
3158    /// "I deserve this; the company owes me."
3159    Entitlement,
3160    /// "I was just following orders."
3161    FollowingOrders,
3162    /// "They won't miss it; they have plenty."
3163    TheyWontMissIt,
3164    /// "I need it more than they do."
3165    NeedItMore,
3166    /// "It's not really stealing."
3167    NotReallyStealing,
3168    /// "I'm underpaid for what I do."
3169    Underpaid,
3170    /// "It's a victimless crime."
3171    VictimlessCrime,
3172}
3173
3174impl Rationalization {
3175    /// Returns the risk weight (0.0-1.0) for this rationalization.
3176    pub fn risk_weight(&self) -> f64 {
3177        match self {
3178            // More dangerous rationalizations
3179            Rationalization::Entitlement => 0.85,
3180            Rationalization::EveryoneDoesIt => 0.80,
3181            Rationalization::NotReallyStealing => 0.80,
3182            Rationalization::TheyWontMissIt => 0.75,
3183            // Medium risk
3184            Rationalization::Underpaid => 0.70,
3185            Rationalization::ForTheCompanyGood => 0.65,
3186            Rationalization::NeedItMore => 0.65,
3187            // Lower risk (still indicates fraud)
3188            Rationalization::TemporaryBorrowing => 0.60,
3189            Rationalization::FollowingOrders => 0.55,
3190            Rationalization::VictimlessCrime => 0.60,
3191        }
3192    }
3193}
3194
3195// ============================================================================
3196// NEAR-MISS TYPES
3197// ============================================================================
3198
3199/// Type of near-miss pattern (suspicious but legitimate).
3200#[derive(Debug, Clone, Serialize, Deserialize)]
3201pub enum NearMissPattern {
3202    /// Transaction very similar to another (possible duplicate but legitimate).
3203    NearDuplicate {
3204        /// Date difference from similar transaction.
3205        date_difference_days: u32,
3206        /// Original transaction ID.
3207        similar_transaction_id: String,
3208    },
3209    /// Amount just below approval threshold (but legitimate).
3210    ThresholdProximity {
3211        /// The threshold being approached.
3212        threshold: Decimal,
3213        /// Percentage of threshold (0.0-1.0).
3214        proximity: f64,
3215    },
3216    /// Unusual but legitimate business pattern.
3217    UnusualLegitimate {
3218        /// Type of legitimate pattern.
3219        pattern_type: LegitimatePatternType,
3220        /// Business justification.
3221        justification: String,
3222    },
3223    /// Error that was caught and corrected.
3224    CorrectedError {
3225        /// Days until correction.
3226        correction_lag_days: u32,
3227        /// Correction document ID.
3228        correction_document_id: String,
3229    },
3230}
3231
3232/// Types of unusual but legitimate business patterns.
3233#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3234pub enum LegitimatePatternType {
3235    /// Year-end bonus payment.
3236    YearEndBonus,
3237    /// Contract prepayment.
3238    ContractPrepayment,
3239    /// Settlement payment.
3240    SettlementPayment,
3241    /// Insurance claim.
3242    InsuranceClaim,
3243    /// One-time vendor payment.
3244    OneTimePayment,
3245    /// Asset disposal.
3246    AssetDisposal,
3247    /// Seasonal inventory buildup.
3248    SeasonalInventory,
3249    /// Promotional spending.
3250    PromotionalSpending,
3251}
3252
3253impl LegitimatePatternType {
3254    /// Returns a description of this pattern type.
3255    pub fn description(&self) -> &'static str {
3256        match self {
3257            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3258            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3259            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3260            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3261            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3262            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3263            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3264            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3265        }
3266    }
3267}
3268
3269/// What might trigger a false positive for this near-miss.
3270#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3271pub enum FalsePositiveTrigger {
3272    /// Amount is near threshold.
3273    AmountNearThreshold,
3274    /// Timing is unusual.
3275    UnusualTiming,
3276    /// Similar to existing transaction.
3277    SimilarTransaction,
3278    /// New counterparty.
3279    NewCounterparty,
3280    /// Account combination unusual.
3281    UnusualAccountCombination,
3282    /// Volume spike.
3283    VolumeSpike,
3284    /// Round amount.
3285    RoundAmount,
3286}
3287
3288/// Label for a near-miss case.
3289#[derive(Debug, Clone, Serialize, Deserialize)]
3290pub struct NearMissLabel {
3291    /// Document ID.
3292    pub document_id: String,
3293    /// The near-miss pattern.
3294    pub pattern: NearMissPattern,
3295    /// How suspicious it appears (0.0-1.0).
3296    pub suspicion_score: f64,
3297    /// What would trigger a false positive.
3298    pub false_positive_trigger: FalsePositiveTrigger,
3299    /// Why this is actually legitimate.
3300    pub explanation: String,
3301}
3302
3303impl NearMissLabel {
3304    /// Creates a new near-miss label.
3305    pub fn new(
3306        document_id: impl Into<String>,
3307        pattern: NearMissPattern,
3308        suspicion_score: f64,
3309        trigger: FalsePositiveTrigger,
3310        explanation: impl Into<String>,
3311    ) -> Self {
3312        Self {
3313            document_id: document_id.into(),
3314            pattern,
3315            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3316            false_positive_trigger: trigger,
3317            explanation: explanation.into(),
3318        }
3319    }
3320}
3321
3322/// Configuration for anomaly rates.
3323#[derive(Debug, Clone, Serialize, Deserialize)]
3324pub struct AnomalyRateConfig {
3325    /// Overall anomaly rate (0.0 - 1.0).
3326    pub total_rate: f64,
3327    /// Fraud rate as proportion of anomalies.
3328    pub fraud_rate: f64,
3329    /// Error rate as proportion of anomalies.
3330    pub error_rate: f64,
3331    /// Process issue rate as proportion of anomalies.
3332    pub process_issue_rate: f64,
3333    /// Statistical anomaly rate as proportion of anomalies.
3334    pub statistical_rate: f64,
3335    /// Relational anomaly rate as proportion of anomalies.
3336    pub relational_rate: f64,
3337}
3338
3339impl Default for AnomalyRateConfig {
3340    fn default() -> Self {
3341        Self {
3342            total_rate: 0.02,         // 2% of transactions are anomalous
3343            fraud_rate: 0.25,         // 25% of anomalies are fraud
3344            error_rate: 0.35,         // 35% of anomalies are errors
3345            process_issue_rate: 0.20, // 20% are process issues
3346            statistical_rate: 0.15,   // 15% are statistical
3347            relational_rate: 0.05,    // 5% are relational
3348        }
3349    }
3350}
3351
3352impl AnomalyRateConfig {
3353    /// Validates that rates sum to approximately 1.0.
3354    pub fn validate(&self) -> Result<(), String> {
3355        let sum = self.fraud_rate
3356            + self.error_rate
3357            + self.process_issue_rate
3358            + self.statistical_rate
3359            + self.relational_rate;
3360
3361        if (sum - 1.0).abs() > 0.01 {
3362            return Err(format!(
3363                "Anomaly category rates must sum to 1.0, got {}",
3364                sum
3365            ));
3366        }
3367
3368        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3369            return Err(format!(
3370                "Total rate must be between 0.0 and 1.0, got {}",
3371                self.total_rate
3372            ));
3373        }
3374
3375        Ok(())
3376    }
3377}
3378
3379#[cfg(test)]
3380#[allow(clippy::unwrap_used)]
3381mod tests {
3382    use super::*;
3383    use rust_decimal_macros::dec;
3384
3385    #[test]
3386    fn test_anomaly_type_category() {
3387        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3388        assert_eq!(fraud.category(), "Fraud");
3389        assert!(fraud.is_intentional());
3390
3391        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3392        assert_eq!(error.category(), "Error");
3393        assert!(!error.is_intentional());
3394    }
3395
3396    #[test]
3397    fn test_labeled_anomaly() {
3398        let anomaly = LabeledAnomaly::new(
3399            "ANO001".to_string(),
3400            AnomalyType::Fraud(FraudType::SelfApproval),
3401            "JE001".to_string(),
3402            "JE".to_string(),
3403            "1000".to_string(),
3404            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3405        )
3406        .with_description("User approved their own expense report")
3407        .with_related_entity("USER001");
3408
3409        assert_eq!(anomaly.severity, 3);
3410        assert!(anomaly.is_injected);
3411        assert_eq!(anomaly.related_entities.len(), 1);
3412    }
3413
3414    #[test]
3415    fn test_labeled_anomaly_with_provenance() {
3416        let anomaly = LabeledAnomaly::new(
3417            "ANO001".to_string(),
3418            AnomalyType::Fraud(FraudType::SelfApproval),
3419            "JE001".to_string(),
3420            "JE".to_string(),
3421            "1000".to_string(),
3422            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3423        )
3424        .with_run_id("run-123")
3425        .with_generation_seed(42)
3426        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3427        .with_structured_strategy(InjectionStrategy::SelfApproval {
3428            user_id: "USER001".to_string(),
3429        })
3430        .with_scenario("scenario-001")
3431        .with_original_document_hash("abc123");
3432
3433        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3434        assert_eq!(anomaly.generation_seed, Some(42));
3435        assert!(anomaly.causal_reason.is_some());
3436        assert!(anomaly.structured_strategy.is_some());
3437        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3438        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3439
3440        // Check that legacy injection_strategy is also set
3441        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3442    }
3443
3444    #[test]
3445    fn test_labeled_anomaly_derivation_chain() {
3446        let parent = LabeledAnomaly::new(
3447            "ANO001".to_string(),
3448            AnomalyType::Fraud(FraudType::DuplicatePayment),
3449            "JE001".to_string(),
3450            "JE".to_string(),
3451            "1000".to_string(),
3452            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3453        );
3454
3455        let child = LabeledAnomaly::new(
3456            "ANO002".to_string(),
3457            AnomalyType::Error(ErrorType::DuplicateEntry),
3458            "JE002".to_string(),
3459            "JE".to_string(),
3460            "1000".to_string(),
3461            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3462        )
3463        .with_parent_anomaly(&parent.anomaly_id);
3464
3465        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3466    }
3467
3468    #[test]
3469    fn test_injection_strategy_description() {
3470        let strategy = InjectionStrategy::AmountManipulation {
3471            original: dec!(1000),
3472            factor: 2.5,
3473        };
3474        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3475        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3476
3477        let strategy = InjectionStrategy::ThresholdAvoidance {
3478            threshold: dec!(10000),
3479            adjusted_amount: dec!(9999),
3480        };
3481        assert_eq!(
3482            strategy.description(),
3483            "Amount adjusted to avoid 10000 threshold"
3484        );
3485
3486        let strategy = InjectionStrategy::DateShift {
3487            days_shifted: -5,
3488            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3489        };
3490        assert_eq!(strategy.description(), "Date backdated by 5 days");
3491
3492        let strategy = InjectionStrategy::DateShift {
3493            days_shifted: 3,
3494            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3495        };
3496        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3497    }
3498
3499    #[test]
3500    fn test_causal_reason_variants() {
3501        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3502        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3503            assert!((base_rate - 0.02).abs() < 0.001);
3504        }
3505
3506        let reason = AnomalyCausalReason::TemporalPattern {
3507            pattern_name: "year_end_spike".to_string(),
3508        };
3509        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3510            assert_eq!(pattern_name, "year_end_spike");
3511        }
3512
3513        let reason = AnomalyCausalReason::ScenarioStep {
3514            scenario_type: "kickback".to_string(),
3515            step_number: 3,
3516        };
3517        if let AnomalyCausalReason::ScenarioStep {
3518            scenario_type,
3519            step_number,
3520        } = reason
3521        {
3522            assert_eq!(scenario_type, "kickback");
3523            assert_eq!(step_number, 3);
3524        }
3525    }
3526
3527    #[test]
3528    fn test_feature_vector_length() {
3529        let anomaly = LabeledAnomaly::new(
3530            "ANO001".to_string(),
3531            AnomalyType::Fraud(FraudType::SelfApproval),
3532            "JE001".to_string(),
3533            "JE".to_string(),
3534            "1000".to_string(),
3535            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3536        );
3537
3538        let features = anomaly.to_features();
3539        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3540        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3541    }
3542
3543    #[test]
3544    fn test_feature_vector_with_provenance() {
3545        let anomaly = LabeledAnomaly::new(
3546            "ANO001".to_string(),
3547            AnomalyType::Fraud(FraudType::SelfApproval),
3548            "JE001".to_string(),
3549            "JE".to_string(),
3550            "1000".to_string(),
3551            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3552        )
3553        .with_scenario("scenario-001")
3554        .with_parent_anomaly("ANO000");
3555
3556        let features = anomaly.to_features();
3557
3558        // Last two features should be 1.0 (has scenario, has parent)
3559        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3560        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3561    }
3562
3563    #[test]
3564    fn test_anomaly_summary() {
3565        let anomalies = vec![
3566            LabeledAnomaly::new(
3567                "ANO001".to_string(),
3568                AnomalyType::Fraud(FraudType::SelfApproval),
3569                "JE001".to_string(),
3570                "JE".to_string(),
3571                "1000".to_string(),
3572                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3573            ),
3574            LabeledAnomaly::new(
3575                "ANO002".to_string(),
3576                AnomalyType::Error(ErrorType::DuplicateEntry),
3577                "JE002".to_string(),
3578                "JE".to_string(),
3579                "1000".to_string(),
3580                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3581            ),
3582        ];
3583
3584        let summary = AnomalySummary::from_anomalies(&anomalies);
3585
3586        assert_eq!(summary.total_count, 2);
3587        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3588        assert_eq!(summary.by_category.get("Error"), Some(&1));
3589    }
3590
3591    #[test]
3592    fn test_rate_config_validation() {
3593        let config = AnomalyRateConfig::default();
3594        assert!(config.validate().is_ok());
3595
3596        let bad_config = AnomalyRateConfig {
3597            fraud_rate: 0.5,
3598            error_rate: 0.5,
3599            process_issue_rate: 0.5, // Sum > 1.0
3600            ..Default::default()
3601        };
3602        assert!(bad_config.validate().is_err());
3603    }
3604
3605    #[test]
3606    fn test_injection_strategy_serialization() {
3607        let strategy = InjectionStrategy::SoDViolation {
3608            duty1: "CreatePO".to_string(),
3609            duty2: "ApprovePO".to_string(),
3610            violating_user: "USER001".to_string(),
3611        };
3612
3613        let json = serde_json::to_string(&strategy).unwrap();
3614        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3615
3616        assert_eq!(strategy, deserialized);
3617    }
3618
3619    #[test]
3620    fn test_labeled_anomaly_serialization_with_provenance() {
3621        let anomaly = LabeledAnomaly::new(
3622            "ANO001".to_string(),
3623            AnomalyType::Fraud(FraudType::SelfApproval),
3624            "JE001".to_string(),
3625            "JE".to_string(),
3626            "1000".to_string(),
3627            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3628        )
3629        .with_run_id("run-123")
3630        .with_generation_seed(42)
3631        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3632
3633        let json = serde_json::to_string(&anomaly).unwrap();
3634        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3635
3636        assert_eq!(anomaly.run_id, deserialized.run_id);
3637        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3638    }
3639
3640    // ========================================
3641    // FR-003 ENHANCED TAXONOMY TESTS
3642    // ========================================
3643
3644    #[test]
3645    fn test_anomaly_category_from_anomaly_type() {
3646        // Fraud mappings
3647        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3648        assert_eq!(
3649            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3650            AnomalyCategory::FictitiousVendor
3651        );
3652
3653        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3654        assert_eq!(
3655            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3656            AnomalyCategory::VendorKickback
3657        );
3658
3659        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3660        assert_eq!(
3661            AnomalyCategory::from_anomaly_type(&fraud_structured),
3662            AnomalyCategory::StructuredTransaction
3663        );
3664
3665        // Error mappings
3666        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3667        assert_eq!(
3668            AnomalyCategory::from_anomaly_type(&error_duplicate),
3669            AnomalyCategory::DuplicatePayment
3670        );
3671
3672        // Process issue mappings
3673        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3674        assert_eq!(
3675            AnomalyCategory::from_anomaly_type(&process_skip),
3676            AnomalyCategory::MissingApproval
3677        );
3678
3679        // Relational mappings
3680        let relational_circular =
3681            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3682        assert_eq!(
3683            AnomalyCategory::from_anomaly_type(&relational_circular),
3684            AnomalyCategory::CircularFlow
3685        );
3686    }
3687
3688    #[test]
3689    fn test_anomaly_category_ordinal() {
3690        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3691        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3692        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3693    }
3694
3695    #[test]
3696    fn test_contributing_factor() {
3697        let factor = ContributingFactor::new(
3698            FactorType::AmountDeviation,
3699            15000.0,
3700            10000.0,
3701            true,
3702            0.5,
3703            "Amount exceeds threshold",
3704        );
3705
3706        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3707        assert_eq!(factor.value, 15000.0);
3708        assert_eq!(factor.threshold, 10000.0);
3709        assert!(factor.direction_greater);
3710
3711        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3712        let contribution = factor.contribution();
3713        assert!((contribution - 0.25).abs() < 0.01);
3714    }
3715
3716    #[test]
3717    fn test_contributing_factor_with_evidence() {
3718        let mut data = HashMap::new();
3719        data.insert("expected".to_string(), "10000".to_string());
3720        data.insert("actual".to_string(), "15000".to_string());
3721
3722        let factor = ContributingFactor::new(
3723            FactorType::AmountDeviation,
3724            15000.0,
3725            10000.0,
3726            true,
3727            0.5,
3728            "Amount deviation detected",
3729        )
3730        .with_evidence("transaction_history", data);
3731
3732        assert!(factor.evidence.is_some());
3733        let evidence = factor.evidence.unwrap();
3734        assert_eq!(evidence.source, "transaction_history");
3735        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3736    }
3737
3738    #[test]
3739    fn test_enhanced_anomaly_label() {
3740        let base = LabeledAnomaly::new(
3741            "ANO001".to_string(),
3742            AnomalyType::Fraud(FraudType::DuplicatePayment),
3743            "JE001".to_string(),
3744            "JE".to_string(),
3745            "1000".to_string(),
3746            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3747        );
3748
3749        let enhanced = EnhancedAnomalyLabel::from_base(base)
3750            .with_confidence(0.85)
3751            .with_severity(0.7)
3752            .with_factor(ContributingFactor::new(
3753                FactorType::DuplicateIndicator,
3754                1.0,
3755                0.5,
3756                true,
3757                0.4,
3758                "Duplicate payment detected",
3759            ))
3760            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3761
3762        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3763        assert_eq!(enhanced.enhanced_confidence, 0.85);
3764        assert_eq!(enhanced.enhanced_severity, 0.7);
3765        assert_eq!(enhanced.contributing_factors.len(), 1);
3766        assert_eq!(enhanced.secondary_categories.len(), 1);
3767    }
3768
3769    #[test]
3770    fn test_enhanced_anomaly_label_features() {
3771        let base = LabeledAnomaly::new(
3772            "ANO001".to_string(),
3773            AnomalyType::Fraud(FraudType::SelfApproval),
3774            "JE001".to_string(),
3775            "JE".to_string(),
3776            "1000".to_string(),
3777            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3778        );
3779
3780        let enhanced = EnhancedAnomalyLabel::from_base(base)
3781            .with_confidence(0.9)
3782            .with_severity(0.8)
3783            .with_factor(ContributingFactor::new(
3784                FactorType::ControlBypass,
3785                1.0,
3786                0.0,
3787                true,
3788                0.5,
3789                "Control bypass detected",
3790            ));
3791
3792        let features = enhanced.to_features();
3793
3794        // Should have 25 features (15 base + 10 enhanced)
3795        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3796        assert_eq!(features.len(), 25);
3797
3798        // Check enhanced confidence is in features
3799        assert_eq!(features[15], 0.9); // enhanced_confidence
3800
3801        // Check has_control_bypass flag
3802        assert_eq!(features[21], 1.0); // has_control_bypass
3803    }
3804
3805    #[test]
3806    fn test_enhanced_anomaly_label_feature_names() {
3807        let names = EnhancedAnomalyLabel::feature_names();
3808        assert_eq!(names.len(), 25);
3809        assert!(names.contains(&"enhanced_confidence"));
3810        assert!(names.contains(&"enhanced_severity"));
3811        assert!(names.contains(&"has_control_bypass"));
3812    }
3813
3814    #[test]
3815    fn test_factor_type_names() {
3816        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3817        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3818        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3819    }
3820
3821    #[test]
3822    fn test_anomaly_category_serialization() {
3823        let category = AnomalyCategory::CircularFlow;
3824        let json = serde_json::to_string(&category).unwrap();
3825        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3826        assert_eq!(category, deserialized);
3827
3828        let custom = AnomalyCategory::Custom("custom_type".to_string());
3829        let json = serde_json::to_string(&custom).unwrap();
3830        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3831        assert_eq!(custom, deserialized);
3832    }
3833
3834    #[test]
3835    fn test_enhanced_label_secondary_category_dedup() {
3836        let base = LabeledAnomaly::new(
3837            "ANO001".to_string(),
3838            AnomalyType::Fraud(FraudType::DuplicatePayment),
3839            "JE001".to_string(),
3840            "JE".to_string(),
3841            "1000".to_string(),
3842            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3843        );
3844
3845        let enhanced = EnhancedAnomalyLabel::from_base(base)
3846            // Try to add the primary category as secondary (should be ignored)
3847            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3848            // Add a valid secondary
3849            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3850            // Try to add duplicate secondary (should be ignored)
3851            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3852
3853        // Should only have 1 secondary category (TimingAnomaly)
3854        assert_eq!(enhanced.secondary_categories.len(), 1);
3855        assert_eq!(
3856            enhanced.secondary_categories[0],
3857            AnomalyCategory::TimingAnomaly
3858        );
3859    }
3860
3861    // ==========================================================================
3862    // Accounting Standards Fraud Type Tests
3863    // ==========================================================================
3864
3865    #[test]
3866    fn test_revenue_recognition_fraud_types() {
3867        // Test ASC 606/IFRS 15 related fraud types
3868        let fraud_types = [
3869            FraudType::ImproperRevenueRecognition,
3870            FraudType::ImproperPoAllocation,
3871            FraudType::VariableConsiderationManipulation,
3872            FraudType::ContractModificationMisstatement,
3873        ];
3874
3875        for fraud_type in fraud_types {
3876            let anomaly_type = AnomalyType::Fraud(fraud_type);
3877            assert_eq!(anomaly_type.category(), "Fraud");
3878            assert!(anomaly_type.is_intentional());
3879            assert!(anomaly_type.severity() >= 3);
3880        }
3881    }
3882
3883    #[test]
3884    fn test_lease_accounting_fraud_types() {
3885        // Test ASC 842/IFRS 16 related fraud types
3886        let fraud_types = [
3887            FraudType::LeaseClassificationManipulation,
3888            FraudType::OffBalanceSheetLease,
3889            FraudType::LeaseLiabilityUnderstatement,
3890            FraudType::RouAssetMisstatement,
3891        ];
3892
3893        for fraud_type in fraud_types {
3894            let anomaly_type = AnomalyType::Fraud(fraud_type);
3895            assert_eq!(anomaly_type.category(), "Fraud");
3896            assert!(anomaly_type.is_intentional());
3897            assert!(anomaly_type.severity() >= 3);
3898        }
3899
3900        // Off-balance sheet lease fraud should be high severity
3901        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3902    }
3903
3904    #[test]
3905    fn test_fair_value_fraud_types() {
3906        // Test ASC 820/IFRS 13 related fraud types
3907        let fraud_types = [
3908            FraudType::FairValueHierarchyManipulation,
3909            FraudType::Level3InputManipulation,
3910            FraudType::ValuationTechniqueManipulation,
3911        ];
3912
3913        for fraud_type in fraud_types {
3914            let anomaly_type = AnomalyType::Fraud(fraud_type);
3915            assert_eq!(anomaly_type.category(), "Fraud");
3916            assert!(anomaly_type.is_intentional());
3917            assert!(anomaly_type.severity() >= 4);
3918        }
3919
3920        // Level 3 manipulation is highest severity (unobservable inputs)
3921        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3922    }
3923
3924    #[test]
3925    fn test_impairment_fraud_types() {
3926        // Test ASC 360/IAS 36 related fraud types
3927        let fraud_types = [
3928            FraudType::DelayedImpairment,
3929            FraudType::ImpairmentTestAvoidance,
3930            FraudType::CashFlowProjectionManipulation,
3931            FraudType::ImproperImpairmentReversal,
3932        ];
3933
3934        for fraud_type in fraud_types {
3935            let anomaly_type = AnomalyType::Fraud(fraud_type);
3936            assert_eq!(anomaly_type.category(), "Fraud");
3937            assert!(anomaly_type.is_intentional());
3938            assert!(anomaly_type.severity() >= 3);
3939        }
3940
3941        // Cash flow manipulation has highest severity
3942        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3943    }
3944
3945    // ==========================================================================
3946    // Accounting Standards Error Type Tests
3947    // ==========================================================================
3948
3949    #[test]
3950    fn test_standards_error_types() {
3951        // Test non-fraudulent accounting standards errors
3952        let error_types = [
3953            ErrorType::RevenueTimingError,
3954            ErrorType::PoAllocationError,
3955            ErrorType::LeaseClassificationError,
3956            ErrorType::LeaseCalculationError,
3957            ErrorType::FairValueError,
3958            ErrorType::ImpairmentCalculationError,
3959            ErrorType::DiscountRateError,
3960            ErrorType::FrameworkApplicationError,
3961        ];
3962
3963        for error_type in error_types {
3964            let anomaly_type = AnomalyType::Error(error_type);
3965            assert_eq!(anomaly_type.category(), "Error");
3966            assert!(!anomaly_type.is_intentional());
3967            assert!(anomaly_type.severity() >= 3);
3968        }
3969    }
3970
3971    #[test]
3972    fn test_framework_application_error() {
3973        // Test IFRS vs GAAP confusion errors
3974        let error_type = ErrorType::FrameworkApplicationError;
3975        assert_eq!(error_type.severity(), 4);
3976
3977        let anomaly = LabeledAnomaly::new(
3978            "ERR001".to_string(),
3979            AnomalyType::Error(error_type),
3980            "JE100".to_string(),
3981            "JE".to_string(),
3982            "1000".to_string(),
3983            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
3984        )
3985        .with_description("LIFO inventory method used under IFRS (not permitted)")
3986        .with_metadata("framework", "IFRS")
3987        .with_metadata("standard_violated", "IAS 2");
3988
3989        assert_eq!(anomaly.anomaly_type.category(), "Error");
3990        assert_eq!(
3991            anomaly.metadata.get("standard_violated"),
3992            Some(&"IAS 2".to_string())
3993        );
3994    }
3995
3996    #[test]
3997    fn test_standards_anomaly_serialization() {
3998        // Test that new fraud types serialize/deserialize correctly
3999        let fraud_types = [
4000            FraudType::ImproperRevenueRecognition,
4001            FraudType::LeaseClassificationManipulation,
4002            FraudType::FairValueHierarchyManipulation,
4003            FraudType::DelayedImpairment,
4004        ];
4005
4006        for fraud_type in fraud_types {
4007            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4008            let deserialized: FraudType =
4009                serde_json::from_str(&json).expect("Failed to deserialize");
4010            assert_eq!(fraud_type, deserialized);
4011        }
4012
4013        // Test error types
4014        let error_types = [
4015            ErrorType::RevenueTimingError,
4016            ErrorType::LeaseCalculationError,
4017            ErrorType::FairValueError,
4018            ErrorType::FrameworkApplicationError,
4019        ];
4020
4021        for error_type in error_types {
4022            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4023            let deserialized: ErrorType =
4024                serde_json::from_str(&json).expect("Failed to deserialize");
4025            assert_eq!(error_type, deserialized);
4026        }
4027    }
4028
4029    #[test]
4030    fn test_standards_labeled_anomaly() {
4031        // Test creating a labeled anomaly for a standards violation
4032        let anomaly = LabeledAnomaly::new(
4033            "STD001".to_string(),
4034            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4035            "CONTRACT-2024-001".to_string(),
4036            "Revenue".to_string(),
4037            "1000".to_string(),
4038            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4039        )
4040        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4041        .with_monetary_impact(dec!(500000))
4042        .with_metadata("standard", "ASC 606")
4043        .with_metadata("paragraph", "606-10-25-1")
4044        .with_metadata("contract_id", "C-2024-001")
4045        .with_related_entity("CONTRACT-2024-001")
4046        .with_related_entity("CUSTOMER-500");
4047
4048        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4049        assert!(anomaly.is_injected);
4050        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4051        assert_eq!(anomaly.related_entities.len(), 2);
4052        assert_eq!(
4053            anomaly.metadata.get("standard"),
4054            Some(&"ASC 606".to_string())
4055        );
4056    }
4057
4058    // ==========================================================================
4059    // Multi-Dimensional Labeling Tests
4060    // ==========================================================================
4061
4062    #[test]
4063    fn test_severity_level() {
4064        assert_eq!(SeverityLevel::Low.numeric(), 1);
4065        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4066
4067        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4068        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4069
4070        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4071        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4072
4073        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4074    }
4075
4076    #[test]
4077    fn test_anomaly_severity() {
4078        let severity =
4079            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4080
4081        assert_eq!(severity.level, SeverityLevel::High);
4082        assert!(severity.is_material);
4083        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4084
4085        // Not material
4086        let low_severity =
4087            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4088        assert!(!low_severity.is_material);
4089    }
4090
4091    #[test]
4092    fn test_detection_difficulty() {
4093        assert!(
4094            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4095        );
4096        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4097
4098        assert_eq!(
4099            AnomalyDetectionDifficulty::from_score(0.05),
4100            AnomalyDetectionDifficulty::Trivial
4101        );
4102        assert_eq!(
4103            AnomalyDetectionDifficulty::from_score(0.90),
4104            AnomalyDetectionDifficulty::Expert
4105        );
4106
4107        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4108    }
4109
4110    #[test]
4111    fn test_ground_truth_certainty() {
4112        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4113        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4114        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4115    }
4116
4117    #[test]
4118    fn test_detection_method() {
4119        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4120        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4121    }
4122
4123    #[test]
4124    fn test_extended_anomaly_label() {
4125        let base = LabeledAnomaly::new(
4126            "ANO001".to_string(),
4127            AnomalyType::Fraud(FraudType::FictitiousVendor),
4128            "JE001".to_string(),
4129            "JE".to_string(),
4130            "1000".to_string(),
4131            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4132        )
4133        .with_monetary_impact(dec!(100000));
4134
4135        let extended = ExtendedAnomalyLabel::from_base(base)
4136            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4137            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4138            .with_method(DetectionMethod::GraphBased)
4139            .with_method(DetectionMethod::ForensicAudit)
4140            .with_indicator("New vendor with no history")
4141            .with_indicator("Large first transaction")
4142            .with_certainty(GroundTruthCertainty::Definite)
4143            .with_entity("V001")
4144            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4145            .with_scheme("SCHEME001", 2);
4146
4147        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4148        assert_eq!(
4149            extended.detection_difficulty,
4150            AnomalyDetectionDifficulty::Hard
4151        );
4152        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4153        assert_eq!(extended.recommended_methods.len(), 3);
4154        assert_eq!(extended.key_indicators.len(), 2);
4155        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4156        assert_eq!(extended.scheme_stage, Some(2));
4157    }
4158
4159    #[test]
4160    fn test_extended_anomaly_label_features() {
4161        let base = LabeledAnomaly::new(
4162            "ANO001".to_string(),
4163            AnomalyType::Fraud(FraudType::SelfApproval),
4164            "JE001".to_string(),
4165            "JE".to_string(),
4166            "1000".to_string(),
4167            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4168        );
4169
4170        let extended =
4171            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4172
4173        let features = extended.to_features();
4174        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4175        assert_eq!(features.len(), 30);
4176
4177        // Check difficulty score is in features
4178        let difficulty_idx = 18; // Position of difficulty_score
4179        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4180    }
4181
4182    #[test]
4183    fn test_extended_label_near_miss() {
4184        let base = LabeledAnomaly::new(
4185            "ANO001".to_string(),
4186            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4187            "JE001".to_string(),
4188            "JE".to_string(),
4189            "1000".to_string(),
4190            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4191        );
4192
4193        let extended = ExtendedAnomalyLabel::from_base(base)
4194            .as_near_miss("Year-end bonus payment, legitimately high");
4195
4196        assert!(extended.is_near_miss);
4197        assert!(extended.near_miss_explanation.is_some());
4198    }
4199
4200    #[test]
4201    fn test_scheme_type() {
4202        assert_eq!(
4203            SchemeType::GradualEmbezzlement.name(),
4204            "gradual_embezzlement"
4205        );
4206        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4207        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4208    }
4209
4210    #[test]
4211    fn test_concealment_technique() {
4212        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4213        assert!(
4214            ConcealmentTechnique::Collusion.difficulty_bonus()
4215                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4216        );
4217    }
4218
4219    #[test]
4220    fn test_near_miss_label() {
4221        let near_miss = NearMissLabel::new(
4222            "JE001",
4223            NearMissPattern::ThresholdProximity {
4224                threshold: dec!(10000),
4225                proximity: 0.95,
4226            },
4227            0.7,
4228            FalsePositiveTrigger::AmountNearThreshold,
4229            "Transaction is 95% of threshold but business justified",
4230        );
4231
4232        assert_eq!(near_miss.document_id, "JE001");
4233        assert_eq!(near_miss.suspicion_score, 0.7);
4234        assert_eq!(
4235            near_miss.false_positive_trigger,
4236            FalsePositiveTrigger::AmountNearThreshold
4237        );
4238    }
4239
4240    #[test]
4241    fn test_legitimate_pattern_type() {
4242        assert_eq!(
4243            LegitimatePatternType::YearEndBonus.description(),
4244            "Year-end bonus payment"
4245        );
4246        assert_eq!(
4247            LegitimatePatternType::InsuranceClaim.description(),
4248            "Insurance claim reimbursement"
4249        );
4250    }
4251
4252    #[test]
4253    fn test_severity_detection_difficulty_serialization() {
4254        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4255        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4256        let deserialized: AnomalySeverity =
4257            serde_json::from_str(&json).expect("Failed to deserialize");
4258        assert_eq!(severity.level, deserialized.level);
4259
4260        let difficulty = AnomalyDetectionDifficulty::Hard;
4261        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4262        let deserialized: AnomalyDetectionDifficulty =
4263            serde_json::from_str(&json).expect("Failed to deserialize");
4264        assert_eq!(difficulty, deserialized);
4265    }
4266
4267    // ========================================
4268    // ACFE Taxonomy Tests
4269    // ========================================
4270
4271    #[test]
4272    fn test_acfe_fraud_category() {
4273        let asset = AcfeFraudCategory::AssetMisappropriation;
4274        assert_eq!(asset.name(), "asset_misappropriation");
4275        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4276        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4277        assert_eq!(asset.typical_detection_months(), 12);
4278
4279        let corruption = AcfeFraudCategory::Corruption;
4280        assert_eq!(corruption.name(), "corruption");
4281        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4282
4283        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4284        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4285        assert_eq!(fs_fraud.typical_detection_months(), 24);
4286    }
4287
4288    #[test]
4289    fn test_cash_fraud_scheme() {
4290        let shell = CashFraudScheme::ShellCompany;
4291        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4292        assert_eq!(shell.subcategory(), "billing_schemes");
4293        assert_eq!(shell.severity(), 5);
4294        assert_eq!(
4295            shell.detection_difficulty(),
4296            AnomalyDetectionDifficulty::Hard
4297        );
4298
4299        let ghost = CashFraudScheme::GhostEmployee;
4300        assert_eq!(ghost.subcategory(), "payroll_schemes");
4301        assert_eq!(ghost.severity(), 5);
4302
4303        // Test all variants exist
4304        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4305    }
4306
4307    #[test]
4308    fn test_asset_fraud_scheme() {
4309        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4310        assert_eq!(
4311            ip_theft.category(),
4312            AcfeFraudCategory::AssetMisappropriation
4313        );
4314        assert_eq!(ip_theft.subcategory(), "other_assets");
4315        assert_eq!(ip_theft.severity(), 5);
4316
4317        let inv_theft = AssetFraudScheme::InventoryTheft;
4318        assert_eq!(inv_theft.subcategory(), "inventory");
4319        assert_eq!(inv_theft.severity(), 4);
4320    }
4321
4322    #[test]
4323    fn test_corruption_scheme() {
4324        let kickback = CorruptionScheme::InvoiceKickback;
4325        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4326        assert_eq!(kickback.subcategory(), "bribery");
4327        assert_eq!(kickback.severity(), 5);
4328        assert_eq!(
4329            kickback.detection_difficulty(),
4330            AnomalyDetectionDifficulty::Expert
4331        );
4332
4333        let bid_rigging = CorruptionScheme::BidRigging;
4334        assert_eq!(bid_rigging.subcategory(), "bribery");
4335        assert_eq!(
4336            bid_rigging.detection_difficulty(),
4337            AnomalyDetectionDifficulty::Hard
4338        );
4339
4340        let purchasing = CorruptionScheme::PurchasingConflict;
4341        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4342
4343        // Test all variants exist
4344        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4345    }
4346
4347    #[test]
4348    fn test_financial_statement_scheme() {
4349        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4350        assert_eq!(
4351            fictitious.category(),
4352            AcfeFraudCategory::FinancialStatementFraud
4353        );
4354        assert_eq!(fictitious.subcategory(), "overstatement");
4355        assert_eq!(fictitious.severity(), 5);
4356        assert_eq!(
4357            fictitious.detection_difficulty(),
4358            AnomalyDetectionDifficulty::Expert
4359        );
4360
4361        let understated = FinancialStatementScheme::UnderstatedRevenues;
4362        assert_eq!(understated.subcategory(), "understatement");
4363
4364        // Test all variants exist
4365        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4366    }
4367
4368    #[test]
4369    fn test_acfe_scheme_unified() {
4370        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4371        assert_eq!(
4372            cash_scheme.category(),
4373            AcfeFraudCategory::AssetMisappropriation
4374        );
4375        assert_eq!(cash_scheme.severity(), 5);
4376
4377        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4378        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4379
4380        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4381        assert_eq!(
4382            fs_scheme.category(),
4383            AcfeFraudCategory::FinancialStatementFraud
4384        );
4385    }
4386
4387    #[test]
4388    fn test_acfe_detection_method() {
4389        let tip = AcfeDetectionMethod::Tip;
4390        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4391
4392        let internal_audit = AcfeDetectionMethod::InternalAudit;
4393        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4394
4395        let external_audit = AcfeDetectionMethod::ExternalAudit;
4396        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4397
4398        // Test all variants exist
4399        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4400    }
4401
4402    #[test]
4403    fn test_perpetrator_department() {
4404        let accounting = PerpetratorDepartment::Accounting;
4405        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4406        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4407
4408        let executive = PerpetratorDepartment::Executive;
4409        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4410    }
4411
4412    #[test]
4413    fn test_perpetrator_level() {
4414        let employee = PerpetratorLevel::Employee;
4415        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4416        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4417
4418        let exec = PerpetratorLevel::OwnerExecutive;
4419        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4420    }
4421
4422    #[test]
4423    fn test_acfe_calibration() {
4424        let cal = AcfeCalibration::default();
4425        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4426        assert_eq!(cal.median_duration_months, 12);
4427        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4428        assert!(cal.validate().is_ok());
4429
4430        // Test custom calibration
4431        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4432        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4433        assert_eq!(custom_cal.median_duration_months, 18);
4434
4435        // Test validation failure
4436        let bad_cal = AcfeCalibration {
4437            collusion_rate: 1.5,
4438            ..Default::default()
4439        };
4440        assert!(bad_cal.validate().is_err());
4441    }
4442
4443    #[test]
4444    fn test_fraud_triangle() {
4445        let triangle = FraudTriangle::new(
4446            PressureType::FinancialTargets,
4447            vec![
4448                OpportunityFactor::WeakInternalControls,
4449                OpportunityFactor::ManagementOverride,
4450            ],
4451            Rationalization::ForTheCompanyGood,
4452        );
4453
4454        // Risk score should be between 0 and 1
4455        let risk = triangle.risk_score();
4456        assert!((0.0..=1.0).contains(&risk));
4457        // Should be relatively high given the components
4458        assert!(risk > 0.5);
4459    }
4460
4461    #[test]
4462    fn test_pressure_types() {
4463        let financial = PressureType::FinancialTargets;
4464        assert!(financial.risk_weight() > 0.5);
4465
4466        let gambling = PressureType::GamblingAddiction;
4467        assert_eq!(gambling.risk_weight(), 0.90);
4468    }
4469
4470    #[test]
4471    fn test_opportunity_factors() {
4472        let override_factor = OpportunityFactor::ManagementOverride;
4473        assert_eq!(override_factor.risk_weight(), 0.90);
4474
4475        let weak_controls = OpportunityFactor::WeakInternalControls;
4476        assert!(weak_controls.risk_weight() > 0.8);
4477    }
4478
4479    #[test]
4480    fn test_rationalizations() {
4481        let entitlement = Rationalization::Entitlement;
4482        assert!(entitlement.risk_weight() > 0.8);
4483
4484        let borrowing = Rationalization::TemporaryBorrowing;
4485        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4486    }
4487
4488    #[test]
4489    fn test_acfe_scheme_serialization() {
4490        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4491        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4492        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4493        assert_eq!(scheme, deserialized);
4494
4495        let calibration = AcfeCalibration::default();
4496        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4497        let deserialized: AcfeCalibration =
4498            serde_json::from_str(&json).expect("Failed to deserialize");
4499        assert_eq!(calibration.median_loss, deserialized.median_loss);
4500    }
4501}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs