datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297
298    /// Returns the *in-principle observability class* — which detection arm can, in principle,
299    /// observe this anomaly family. Delegates to the specific sub-type. See [`ObservabilityClass`].
300    pub fn observability_class(&self) -> ObservabilityClass {
301        match self {
302            AnomalyType::Fraud(t) => t.observability_class(),
303            AnomalyType::Error(t) => t.observability_class(),
304            AnomalyType::ProcessIssue(t) => t.observability_class(),
305            AnomalyType::Statistical(t) => t.observability_class(),
306            AnomalyType::Relational(t) => t.observability_class(),
307            // Custom anomalies have no declared structure; assume a per-entry signature.
308            AnomalyType::Custom(_) => ObservabilityClass::PerJeDensity,
309        }
310    }
311}
312
313/// In-principle *observability class* of an anomaly — which detection arm can, in principle,
314/// observe it. Grounds the routing/observability thesis (FINDINGS §12): different anomaly families
315/// surface to different signals, so a fair ML benchmark should report detection against the ceiling
316/// of the arm that *can* see each family, not a single pooled score. Assigned at injection time
317/// from the anomaly type via [`AnomalyType::observability_class`]; it is the *primary* observability
318/// (the arm most likely to surface the family first), not an exclusive claim.
319#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
320#[serde(rename_all = "snake_case")]
321pub enum ObservabilityClass {
322    /// Visible from a single journal entry's own attributes — amount, round-dollar / threshold
323    /// signatures, weekend / off-hours / post-close timing flags, account classification, balance.
324    /// The per-JE *density* residual observes this family.
325    #[default]
326    PerJeDensity,
327    /// Visible only in the cross-entry *account-flow graph* — unusual account pairs, circular
328    /// flows, counterparty nodes, network centrality, intercompany structure. The relational
329    /// account-flow residual observes this family.
330    RelationalGraph,
331    /// Visible only in the cross-period *time series* — trend breaks, level shifts, seasonality,
332    /// period-timing (wrong-period, backdating, cutoff), frequency bursts. A temporal arm observes
333    /// this family.
334    Temporal,
335    /// Visible only with cross-period / cross-entity *memory* of prior occurrences — duplicate
336    /// payments, recurring ghost employees, repeated shell-company payments. Needs audit-doctrine
337    /// carry-forward (FINDINGS §40-41), not a single period's manifold.
338    MemoryOnly,
339}
340
341impl ObservabilityClass {
342    /// Stable snake_case label (matches the serialized form).
343    pub fn as_str(&self) -> &'static str {
344        match self {
345            ObservabilityClass::PerJeDensity => "per_je_density",
346            ObservabilityClass::RelationalGraph => "relational_graph",
347            ObservabilityClass::Temporal => "temporal",
348            ObservabilityClass::MemoryOnly => "memory_only",
349        }
350    }
351}
352
353/// Fraud types for detection training.
354#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
355pub enum FraudType {
356    // Journal Entry Fraud
357    /// Fictitious journal entry with no business purpose.
358    FictitiousEntry,
359    /// Fictitious transaction (alias for FictitiousEntry).
360    FictitiousTransaction,
361    /// Round-dollar amounts suggesting manual manipulation.
362    RoundDollarManipulation,
363    /// Entry posted just below approval threshold.
364    JustBelowThreshold,
365    /// Revenue recognition manipulation.
366    RevenueManipulation,
367    /// Expense capitalization fraud.
368    ImproperCapitalization,
369    /// Improperly capitalizing expenses as assets.
370    ExpenseCapitalization,
371    /// Cookie jar reserves manipulation.
372    ReserveManipulation,
373    /// Round-tripping funds through suspense/clearing accounts.
374    SuspenseAccountAbuse,
375    /// Splitting transactions to stay below approval thresholds.
376    SplitTransaction,
377    /// Unusual timing (weekend, holiday, after-hours postings).
378    TimingAnomaly,
379    /// Posting to unauthorized accounts.
380    UnauthorizedAccess,
381
382    // Approval Fraud
383    /// User approving their own request.
384    SelfApproval,
385    /// Approval beyond authorized limit.
386    ExceededApprovalLimit,
387    /// Segregation of duties violation.
388    SegregationOfDutiesViolation,
389    /// Approval by unauthorized user.
390    UnauthorizedApproval,
391    /// Collusion between approver and requester.
392    CollusiveApproval,
393
394    // Vendor/Payment Fraud
395    /// Fictitious vendor.
396    FictitiousVendor,
397    /// Duplicate payment to vendor.
398    DuplicatePayment,
399    /// Payment to shell company.
400    ShellCompanyPayment,
401    /// Kickback scheme.
402    Kickback,
403    /// Kickback scheme (alias).
404    KickbackScheme,
405    /// Unauthorized customer/vendor discount (sweethearting, side deals).
406    UnauthorizedDiscount,
407    /// Round-tripping funds through multiple entities or accounts to
408    /// inflate apparent activity or obscure origin.
409    RoundTripping,
410    /// Invoice manipulation.
411    InvoiceManipulation,
412
413    // Asset Fraud
414    /// Misappropriation of assets.
415    AssetMisappropriation,
416    /// Inventory theft.
417    InventoryTheft,
418    /// Ghost employee.
419    GhostEmployee,
420
421    // Financial Statement Fraud
422    /// Premature revenue recognition.
423    PrematureRevenue,
424    /// Understated liabilities.
425    UnderstatedLiabilities,
426    /// Overstated assets.
427    OverstatedAssets,
428    /// Channel stuffing.
429    ChannelStuffing,
430
431    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
432    /// Improper revenue recognition timing (ASC 606/IFRS 15).
433    ImproperRevenueRecognition,
434    /// Multiple performance obligations not properly separated.
435    ImproperPoAllocation,
436    /// Variable consideration not properly estimated.
437    VariableConsiderationManipulation,
438    /// Contract modifications not properly accounted for.
439    ContractModificationMisstatement,
440
441    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
442    /// Lease classification manipulation (operating vs finance).
443    LeaseClassificationManipulation,
444    /// Off-balance sheet lease fraud.
445    OffBalanceSheetLease,
446    /// Lease liability understatement.
447    LeaseLiabilityUnderstatement,
448    /// ROU asset misstatement.
449    RouAssetMisstatement,
450
451    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
452    /// Fair value hierarchy misclassification.
453    FairValueHierarchyManipulation,
454    /// Level 3 input manipulation.
455    Level3InputManipulation,
456    /// Valuation technique manipulation.
457    ValuationTechniqueManipulation,
458
459    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
460    /// Delayed impairment recognition.
461    DelayedImpairment,
462    /// Improperly avoiding impairment testing.
463    ImpairmentTestAvoidance,
464    /// Cash flow projection manipulation for impairment.
465    CashFlowProjectionManipulation,
466    /// Improper impairment reversal (IFRS only).
467    ImproperImpairmentReversal,
468
469    // Sourcing/Procurement Fraud (S2C)
470    /// Bid rigging or collusion among bidders.
471    BidRigging,
472    /// Contracts with phantom/shell vendors.
473    PhantomVendorContract,
474    /// Splitting contracts to avoid approval thresholds.
475    SplitContractThreshold,
476    /// Conflict of interest in sourcing decisions.
477    ConflictOfInterestSourcing,
478
479    // HR/Payroll Fraud (H2R)
480    /// Ghost employee on payroll.
481    GhostEmployeePayroll,
482    /// Payroll inflation/unauthorized raises.
483    PayrollInflation,
484    /// Duplicate expense report submission.
485    DuplicateExpenseReport,
486    /// Fictitious expense claims.
487    FictitiousExpense,
488    /// Splitting expenses to avoid approval threshold.
489    SplitExpenseToAvoidApproval,
490
491    // O2C Fraud
492    /// Revenue timing manipulation via quotes.
493    RevenueTimingManipulation,
494    /// Overriding quote prices without authorization.
495    QuotePriceOverride,
496}
497
498impl FraudType {
499    /// Returns severity level (1-5).
500    pub fn severity(&self) -> u8 {
501        match self {
502            FraudType::RoundDollarManipulation => 2,
503            FraudType::JustBelowThreshold => 3,
504            FraudType::SelfApproval => 3,
505            FraudType::ExceededApprovalLimit => 3,
506            FraudType::DuplicatePayment => 3,
507            FraudType::FictitiousEntry => 4,
508            FraudType::RevenueManipulation => 5,
509            FraudType::FictitiousVendor => 5,
510            FraudType::ShellCompanyPayment => 5,
511            FraudType::AssetMisappropriation => 5,
512            FraudType::SegregationOfDutiesViolation => 4,
513            FraudType::CollusiveApproval => 5,
514            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
515            FraudType::ImproperRevenueRecognition => 5,
516            FraudType::ImproperPoAllocation => 4,
517            FraudType::VariableConsiderationManipulation => 4,
518            FraudType::ContractModificationMisstatement => 3,
519            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
520            FraudType::LeaseClassificationManipulation => 4,
521            FraudType::OffBalanceSheetLease => 5,
522            FraudType::LeaseLiabilityUnderstatement => 4,
523            FraudType::RouAssetMisstatement => 3,
524            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
525            FraudType::FairValueHierarchyManipulation => 4,
526            FraudType::Level3InputManipulation => 5,
527            FraudType::ValuationTechniqueManipulation => 4,
528            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
529            FraudType::DelayedImpairment => 4,
530            FraudType::ImpairmentTestAvoidance => 4,
531            FraudType::CashFlowProjectionManipulation => 5,
532            FraudType::ImproperImpairmentReversal => 3,
533            _ => 4,
534        }
535    }
536
537    /// Primary observability class. See [`ObservabilityClass`].
538    pub fn observability_class(&self) -> ObservabilityClass {
539        use ObservabilityClass::*;
540        match self {
541            // Multi-entity flow / counterparty / network structure → relational account-flow graph.
542            FraudType::RoundTripping
543            | FraudType::SuspenseAccountAbuse
544            | FraudType::ShellCompanyPayment
545            | FraudType::Kickback
546            | FraudType::KickbackScheme
547            | FraudType::CollusiveApproval
548            | FraudType::FictitiousVendor
549            | FraudType::PhantomVendorContract
550            | FraudType::BidRigging
551            | FraudType::ConflictOfInterestSourcing => RelationalGraph,
552            // Recurrence / cross-period / cross-entity duplication → needs carry-forward memory.
553            FraudType::DuplicatePayment
554            | FraudType::DuplicateExpenseReport
555            | FraudType::GhostEmployee
556            | FraudType::GhostEmployeePayroll => MemoryOnly,
557            // Period-timing / trend manipulation → needs the time series.
558            FraudType::PrematureRevenue
559            | FraudType::ChannelStuffing
560            | FraudType::RevenueTimingManipulation
561            | FraudType::DelayedImpairment
562            | FraudType::ImpairmentTestAvoidance => Temporal,
563            // Everything else carries a single-entry forensic signature (amount / threshold /
564            // timing-attribute / classification / approval) → per-JE density.
565            _ => PerJeDensity,
566        }
567    }
568}
569
570/// Error types for error detection.
571#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
572pub enum ErrorType {
573    // Data Entry Errors
574    /// Duplicate document entry.
575    DuplicateEntry,
576    /// Reversed debit/credit amounts.
577    ReversedAmount,
578    /// Transposed digits in amount.
579    TransposedDigits,
580    /// Wrong decimal placement.
581    DecimalError,
582    /// Missing required field.
583    MissingField,
584    /// Invalid account code.
585    InvalidAccount,
586
587    // Timing Errors
588    /// Posted to wrong period.
589    WrongPeriod,
590    /// Backdated entry.
591    BackdatedEntry,
592    /// Future-dated entry.
593    FutureDatedEntry,
594    /// Cutoff error.
595    CutoffError,
596
597    // Classification Errors
598    /// Wrong account classification.
599    MisclassifiedAccount,
600    /// Wrong cost center.
601    WrongCostCenter,
602    /// Wrong company code.
603    WrongCompanyCode,
604
605    // Calculation Errors
606    /// Unbalanced journal entry.
607    UnbalancedEntry,
608    /// Rounding error.
609    RoundingError,
610    /// Currency conversion error.
611    CurrencyError,
612    /// Tax calculation error.
613    TaxCalculationError,
614
615    // Accounting Standards Errors (Non-Fraudulent)
616    /// Wrong revenue recognition timing (honest mistake).
617    RevenueTimingError,
618    /// Performance obligation allocation error.
619    PoAllocationError,
620    /// Lease classification error (operating vs finance).
621    LeaseClassificationError,
622    /// Lease calculation error (PV, amortization).
623    LeaseCalculationError,
624    /// Fair value measurement error.
625    FairValueError,
626    /// Impairment calculation error.
627    ImpairmentCalculationError,
628    /// Discount rate error.
629    DiscountRateError,
630    /// Framework application error (IFRS vs GAAP).
631    FrameworkApplicationError,
632}
633
634impl ErrorType {
635    /// Returns severity level (1-5).
636    pub fn severity(&self) -> u8 {
637        match self {
638            ErrorType::RoundingError => 1,
639            ErrorType::MissingField => 2,
640            ErrorType::TransposedDigits => 2,
641            ErrorType::DecimalError => 3,
642            ErrorType::DuplicateEntry => 3,
643            ErrorType::ReversedAmount => 3,
644            ErrorType::WrongPeriod => 4,
645            ErrorType::UnbalancedEntry => 5,
646            ErrorType::CurrencyError => 4,
647            // Accounting Standards Errors
648            ErrorType::RevenueTimingError => 4,
649            ErrorType::PoAllocationError => 3,
650            ErrorType::LeaseClassificationError => 3,
651            ErrorType::LeaseCalculationError => 3,
652            ErrorType::FairValueError => 4,
653            ErrorType::ImpairmentCalculationError => 4,
654            ErrorType::DiscountRateError => 3,
655            ErrorType::FrameworkApplicationError => 4,
656            _ => 3,
657        }
658    }
659
660    /// Primary observability class. See [`ObservabilityClass`].
661    pub fn observability_class(&self) -> ObservabilityClass {
662        use ObservabilityClass::*;
663        match self {
664            // Period-timing errors → temporal time series.
665            ErrorType::WrongPeriod
666            | ErrorType::BackdatedEntry
667            | ErrorType::FutureDatedEntry
668            | ErrorType::CutoffError => Temporal,
669            // A duplicated document is only suspicious relative to its prior occurrence → memory.
670            ErrorType::DuplicateEntry => MemoryOnly,
671            // Amount / account / balance / calculation errors live in the single entry → density.
672            _ => PerJeDensity,
673        }
674    }
675}
676
677/// Process issue types.
678#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
679pub enum ProcessIssueType {
680    // Approval Issues
681    /// Approval skipped entirely.
682    SkippedApproval,
683    /// Late approval (after posting).
684    LateApproval,
685    /// Missing supporting documentation.
686    MissingDocumentation,
687    /// Incomplete approval chain.
688    IncompleteApprovalChain,
689
690    // Timing Issues
691    /// Late posting.
692    LatePosting,
693    /// Posting outside business hours.
694    AfterHoursPosting,
695    /// Weekend/holiday posting.
696    WeekendPosting,
697    /// Rushed period-end posting.
698    RushedPeriodEnd,
699    /// Entry posted after the period-end close date (ISA 240.32).
700    /// Distinct from `RushedPeriodEnd` which flags pre-close volume spikes —
701    /// this variant specifically marks post-close adjustments.
702    PostClosePosting,
703
704    // Control Issues
705    /// Manual override of system control.
706    ManualOverride,
707    /// Unusual user access pattern.
708    UnusualAccess,
709    /// System bypass.
710    SystemBypass,
711    /// Batch processing anomaly.
712    BatchAnomaly,
713
714    // Documentation Issues
715    /// Vague or missing description.
716    VagueDescription,
717    /// Changed after posting.
718    PostFactoChange,
719    /// Incomplete audit trail.
720    IncompleteAuditTrail,
721
722    // Sourcing/Procurement Issues (S2C)
723    /// Purchasing outside of contracts (maverick spend).
724    MaverickSpend,
725    /// Purchasing against an expired contract.
726    ExpiredContractPurchase,
727    /// Overriding contracted price without authorization.
728    ContractPriceOverride,
729    /// Award given with only a single bid received.
730    SingleBidAward,
731    /// Bypassing supplier qualification requirements.
732    QualificationBypass,
733
734    // O2C Issues
735    /// Converting an expired quote to a sales order.
736    ExpiredQuoteConversion,
737}
738
739impl ProcessIssueType {
740    /// Returns severity level (1-5).
741    pub fn severity(&self) -> u8 {
742        match self {
743            ProcessIssueType::VagueDescription => 1,
744            ProcessIssueType::LatePosting => 2,
745            ProcessIssueType::AfterHoursPosting => 2,
746            ProcessIssueType::WeekendPosting => 2,
747            ProcessIssueType::PostClosePosting => 4,
748            ProcessIssueType::SkippedApproval => 4,
749            ProcessIssueType::ManualOverride => 4,
750            ProcessIssueType::SystemBypass => 5,
751            ProcessIssueType::IncompleteAuditTrail => 4,
752            _ => 3,
753        }
754    }
755
756    /// Primary observability class. See [`ObservabilityClass`].
757    pub fn observability_class(&self) -> ObservabilityClass {
758        use ObservabilityClass::*;
759        match self {
760            // Lag between event and posting / period-end crunch → temporal time series.
761            ProcessIssueType::LatePosting
762            | ProcessIssueType::LateApproval
763            | ProcessIssueType::RushedPeriodEnd => Temporal,
764            // The rest are single-entry attribute/control checks — weekend/off-hours/post-close
765            // timing flags, skipped-approval, override, vague description → per-JE density.
766            _ => PerJeDensity,
767        }
768    }
769}
770
771/// Statistical anomaly types.
772#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
773pub enum StatisticalAnomalyType {
774    // Amount Anomalies
775    /// Amount significantly above normal.
776    UnusuallyHighAmount,
777    /// Amount significantly below normal.
778    UnusuallyLowAmount,
779    /// Violates Benford's Law distribution.
780    BenfordViolation,
781    /// Exact duplicate amount (suspicious).
782    ExactDuplicateAmount,
783    /// Repeating pattern in amounts.
784    RepeatingAmount,
785
786    // Frequency Anomalies
787    /// Unusual transaction frequency.
788    UnusualFrequency,
789    /// Burst of transactions.
790    TransactionBurst,
791    /// Unusual time of day.
792    UnusualTiming,
793
794    // Trend Anomalies
795    /// Break in historical trend.
796    TrendBreak,
797    /// Sudden level shift.
798    LevelShift,
799    /// Seasonal pattern violation.
800    SeasonalAnomaly,
801
802    // Distribution Anomalies
803    /// Outlier in distribution.
804    StatisticalOutlier,
805    /// Change in variance.
806    VarianceChange,
807    /// Distribution shift.
808    DistributionShift,
809
810    // Sourcing/Contract Anomalies
811    /// Pattern of SLA breaches from a vendor.
812    SlaBreachPattern,
813    /// Contract with zero utilization.
814    UnusedContract,
815
816    // HR/Payroll Anomalies
817    /// Anomalous overtime patterns.
818    OvertimeAnomaly,
819
820    // Heavy-tail Anomalies (v5.30 B2 / #154)
821    /// Multi-100-line journal entry touching bridge accounts —
822    /// models real consolidation entries, period-end accruals, or
823    /// manual reclasses. Lifts the synthetic p99 / max
824    /// relational_score percentiles into the band the reference
825    /// shard exhibits (20× vs synth's normal-mode 12×). Opt-in via
826    /// `anomaly_injection.consolidation_outlier_rate` (default 0.0
827    /// — preserves v5.29 byte-identical output for configs that
828    /// don't opt in).
829    ConsolidationOutlier,
830}
831
832impl StatisticalAnomalyType {
833    /// Returns severity level (1-5).
834    pub fn severity(&self) -> u8 {
835        match self {
836            StatisticalAnomalyType::UnusualTiming => 1,
837            StatisticalAnomalyType::UnusualFrequency => 2,
838            StatisticalAnomalyType::BenfordViolation => 2,
839            StatisticalAnomalyType::UnusuallyHighAmount => 3,
840            StatisticalAnomalyType::TrendBreak => 3,
841            StatisticalAnomalyType::TransactionBurst => 4,
842            StatisticalAnomalyType::ExactDuplicateAmount => 3,
843            // v5.30 B2 — multi-100-line bridge-account postings are
844            // among the highest-magnitude single-event anomalies the
845            // engine emits; rate them at 4 alongside TransactionBurst.
846            StatisticalAnomalyType::ConsolidationOutlier => 4,
847            _ => 3,
848        }
849    }
850
851    /// Primary observability class. See [`ObservabilityClass`].
852    pub fn observability_class(&self) -> ObservabilityClass {
853        use ObservabilityClass::*;
854        match self {
855            // Frequency / trend / variance over time → temporal time series.
856            StatisticalAnomalyType::UnusualFrequency
857            | StatisticalAnomalyType::TransactionBurst
858            | StatisticalAnomalyType::TrendBreak
859            | StatisticalAnomalyType::LevelShift
860            | StatisticalAnomalyType::SeasonalAnomaly
861            | StatisticalAnomalyType::VarianceChange
862            | StatisticalAnomalyType::DistributionShift
863            | StatisticalAnomalyType::RepeatingAmount
864            | StatisticalAnomalyType::SlaBreachPattern
865            | StatisticalAnomalyType::OvertimeAnomaly => Temporal,
866            // Recurrence / utilization relative to prior state → carry-forward memory.
867            StatisticalAnomalyType::ExactDuplicateAmount
868            | StatisticalAnomalyType::UnusedContract => MemoryOnly,
869            // Multi-100-line bridge-account posting lifts the relational_score percentiles.
870            StatisticalAnomalyType::ConsolidationOutlier => RelationalGraph,
871            // Single-entry amount / distribution outliers (incl. Benford) → per-JE density.
872            _ => PerJeDensity,
873        }
874    }
875}
876
877/// Relational/graph anomaly types.
878#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
879pub enum RelationalAnomalyType {
880    // Transaction Pattern Anomalies
881    /// Circular transaction pattern.
882    CircularTransaction,
883    /// Unusual account combination.
884    UnusualAccountPair,
885    /// New trading partner.
886    NewCounterparty,
887    /// Dormant account suddenly active.
888    DormantAccountActivity,
889
890    // Network Anomalies
891    /// Unusual network centrality.
892    CentralityAnomaly,
893    /// Isolated transaction cluster.
894    IsolatedCluster,
895    /// Bridge node anomaly.
896    BridgeNodeAnomaly,
897    /// Community structure change.
898    CommunityAnomaly,
899
900    // Relationship Anomalies
901    /// Missing expected relationship.
902    MissingRelationship,
903    /// Unexpected relationship.
904    UnexpectedRelationship,
905    /// Relationship strength change.
906    RelationshipStrengthChange,
907
908    // Intercompany Anomalies
909    /// Unmatched intercompany transaction.
910    UnmatchedIntercompany,
911    /// Circular intercompany flow.
912    CircularIntercompany,
913    /// Transfer pricing anomaly.
914    TransferPricingAnomaly,
915
916    // Source-conditional anomalies (SOTA-12, #140)
917    /// JE uses a `(source, account-pair)` combination that is rare under the
918    /// per-source marginal P(account | source) — the single dominant explainer
919    /// for audit-packet top JEs (FINDINGS §13). Selected by the orchestrator's
920    /// anomaly-injection post-process from the generated JE set.
921    SourceConditionalRarity,
922}
923
924impl RelationalAnomalyType {
925    /// Returns severity level (1-5).
926    pub fn severity(&self) -> u8 {
927        match self {
928            RelationalAnomalyType::NewCounterparty => 1,
929            RelationalAnomalyType::DormantAccountActivity => 2,
930            RelationalAnomalyType::UnusualAccountPair => 2,
931            RelationalAnomalyType::CircularTransaction => 4,
932            RelationalAnomalyType::CircularIntercompany => 4,
933            RelationalAnomalyType::TransferPricingAnomaly => 4,
934            RelationalAnomalyType::UnmatchedIntercompany => 3,
935            RelationalAnomalyType::SourceConditionalRarity => 2,
936            _ => 3,
937        }
938    }
939
940    /// Primary observability class — all relational/graph anomalies surface in the account-flow
941    /// graph manifold. See [`ObservabilityClass`].
942    pub fn observability_class(&self) -> ObservabilityClass {
943        ObservabilityClass::RelationalGraph
944    }
945}
946
947/// A labeled anomaly for supervised learning.
948#[derive(Debug, Clone, Serialize, Deserialize)]
949pub struct LabeledAnomaly {
950    /// Unique anomaly identifier.
951    pub anomaly_id: String,
952    /// Type of anomaly.
953    pub anomaly_type: AnomalyType,
954    /// Document or entity that contains the anomaly.
955    pub document_id: String,
956    /// Document type (JE, PO, Invoice, etc.).
957    pub document_type: String,
958    /// Company code.
959    pub company_code: String,
960    /// Date the anomaly occurred.
961    pub anomaly_date: NaiveDate,
962    /// Timestamp when detected/injected.
963    #[serde(with = "crate::serde_timestamp::naive")]
964    pub detection_timestamp: NaiveDateTime,
965    /// Confidence score (0.0 - 1.0) for injected anomalies.
966    pub confidence: f64,
967    /// Severity (1-5).
968    pub severity: u8,
969    /// In-principle observability class — which detection arm can, in principle, surface this
970    /// family (per-JE density / relational graph / temporal / memory-only). Derived from the
971    /// anomaly type at injection time; lets downstream ML eval report against the right ceiling
972    /// per signal rather than a single pooled score. See [`ObservabilityClass`].
973    #[serde(default)]
974    pub observability: ObservabilityClass,
975    /// Description of the anomaly.
976    pub description: String,
977    /// Related entities (user IDs, account codes, etc.).
978    pub related_entities: Vec<String>,
979    /// Monetary impact if applicable.
980    pub monetary_impact: Option<Decimal>,
981    /// Additional metadata.
982    pub metadata: HashMap<String, String>,
983    /// Whether this was injected (true) or naturally occurring (false).
984    pub is_injected: bool,
985    /// Injection strategy used (if injected) - legacy string field.
986    pub injection_strategy: Option<String>,
987    /// Cluster ID if part of an anomaly cluster.
988    pub cluster_id: Option<String>,
989
990    // ========================================
991    // PROVENANCE TRACKING FIELDS (Phase 1.2)
992    // ========================================
993    /// Hash of the original document before modification.
994    /// Enables tracking what the document looked like pre-injection.
995    #[serde(default, skip_serializing_if = "Option::is_none")]
996    pub original_document_hash: Option<String>,
997
998    /// Causal reason explaining why this anomaly was injected.
999    /// Provides "why" tracking for each anomaly.
1000    #[serde(default, skip_serializing_if = "Option::is_none")]
1001    pub causal_reason: Option<AnomalyCausalReason>,
1002
1003    /// Structured injection strategy with parameters.
1004    /// More detailed than the legacy string-based injection_strategy field.
1005    #[serde(default, skip_serializing_if = "Option::is_none")]
1006    pub structured_strategy: Option<InjectionStrategy>,
1007
1008    /// Parent anomaly ID if this was derived from another anomaly.
1009    /// Enables anomaly transformation chains.
1010    #[serde(default, skip_serializing_if = "Option::is_none")]
1011    pub parent_anomaly_id: Option<String>,
1012
1013    /// Child anomaly IDs that were derived from this anomaly.
1014    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1015    pub child_anomaly_ids: Vec<String>,
1016
1017    /// Scenario ID if this anomaly is part of a multi-step scenario.
1018    #[serde(default, skip_serializing_if = "Option::is_none")]
1019    pub scenario_id: Option<String>,
1020
1021    /// Generation run ID that produced this anomaly.
1022    /// Enables tracing anomalies back to their generation run.
1023    #[serde(default, skip_serializing_if = "Option::is_none")]
1024    pub run_id: Option<String>,
1025
1026    /// Seed used for RNG during generation.
1027    /// Enables reproducibility.
1028    #[serde(default, skip_serializing_if = "Option::is_none")]
1029    pub generation_seed: Option<u64>,
1030}
1031
1032impl LabeledAnomaly {
1033    /// Creates a new labeled anomaly.
1034    pub fn new(
1035        anomaly_id: String,
1036        anomaly_type: AnomalyType,
1037        document_id: String,
1038        document_type: String,
1039        company_code: String,
1040        anomaly_date: NaiveDate,
1041    ) -> Self {
1042        let severity = anomaly_type.severity();
1043        let observability = anomaly_type.observability_class();
1044        let description = format!(
1045            "{} - {} in document {}",
1046            anomaly_type.category(),
1047            anomaly_type.type_name(),
1048            document_id
1049        );
1050
1051        Self {
1052            anomaly_id,
1053            anomaly_type,
1054            document_id,
1055            document_type,
1056            company_code,
1057            anomaly_date,
1058            detection_timestamp: chrono::Local::now().naive_local(),
1059            confidence: 1.0,
1060            severity,
1061            observability,
1062            description,
1063            related_entities: Vec::new(),
1064            monetary_impact: None,
1065            metadata: HashMap::new(),
1066            is_injected: true,
1067            injection_strategy: None,
1068            cluster_id: None,
1069            // Provenance fields
1070            original_document_hash: None,
1071            causal_reason: None,
1072            structured_strategy: None,
1073            parent_anomaly_id: None,
1074            child_anomaly_ids: Vec::new(),
1075            scenario_id: None,
1076            run_id: None,
1077            generation_seed: None,
1078        }
1079    }
1080
1081    /// Sets the description.
1082    pub fn with_description(mut self, description: &str) -> Self {
1083        self.description = description.to_string();
1084        self
1085    }
1086
1087    /// Sets the monetary impact.
1088    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
1089        self.monetary_impact = Some(impact);
1090        self
1091    }
1092
1093    /// Adds a related entity.
1094    pub fn with_related_entity(mut self, entity: &str) -> Self {
1095        self.related_entities.push(entity.to_string());
1096        self
1097    }
1098
1099    /// Adds metadata.
1100    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
1101        self.metadata.insert(key.to_string(), value.to_string());
1102        self
1103    }
1104
1105    /// Sets the injection strategy (legacy string).
1106    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
1107        self.injection_strategy = Some(strategy.to_string());
1108        self
1109    }
1110
1111    /// Sets the cluster ID.
1112    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
1113        self.cluster_id = Some(cluster_id.to_string());
1114        self
1115    }
1116
1117    // ========================================
1118    // PROVENANCE BUILDER METHODS (Phase 1.2)
1119    // ========================================
1120
1121    /// Sets the original document hash for provenance tracking.
1122    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
1123        self.original_document_hash = Some(hash.to_string());
1124        self
1125    }
1126
1127    /// Sets the causal reason for this anomaly.
1128    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
1129        self.causal_reason = Some(reason);
1130        self
1131    }
1132
1133    /// Sets the structured injection strategy.
1134    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
1135        // Also set the legacy string field for backward compatibility
1136        self.injection_strategy = Some(strategy.strategy_type().to_string());
1137        self.structured_strategy = Some(strategy);
1138        self
1139    }
1140
1141    /// Sets the parent anomaly ID (for anomaly derivation chains).
1142    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
1143        self.parent_anomaly_id = Some(parent_id.to_string());
1144        self
1145    }
1146
1147    /// Adds a child anomaly ID.
1148    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
1149        self.child_anomaly_ids.push(child_id.to_string());
1150        self
1151    }
1152
1153    /// Sets the scenario ID for multi-step scenario tracking.
1154    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
1155        self.scenario_id = Some(scenario_id.to_string());
1156        self
1157    }
1158
1159    /// Sets the generation run ID.
1160    pub fn with_run_id(mut self, run_id: &str) -> Self {
1161        self.run_id = Some(run_id.to_string());
1162        self
1163    }
1164
1165    /// Sets the generation seed for reproducibility.
1166    pub fn with_generation_seed(mut self, seed: u64) -> Self {
1167        self.generation_seed = Some(seed);
1168        self
1169    }
1170
1171    /// Sets multiple provenance fields at once for convenience.
1172    pub fn with_provenance(
1173        mut self,
1174        run_id: Option<&str>,
1175        seed: Option<u64>,
1176        causal_reason: Option<AnomalyCausalReason>,
1177    ) -> Self {
1178        if let Some(id) = run_id {
1179            self.run_id = Some(id.to_string());
1180        }
1181        self.generation_seed = seed;
1182        self.causal_reason = causal_reason;
1183        self
1184    }
1185
1186    /// Converts to a feature vector for ML.
1187    ///
1188    /// Returns a vector of 15 features:
1189    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1190    /// - 1 feature: Severity (normalized 0-1)
1191    /// - 1 feature: Confidence
1192    /// - 1 feature: Has monetary impact (0/1)
1193    /// - 1 feature: Monetary impact (log-scaled)
1194    /// - 1 feature: Is intentional (0/1)
1195    /// - 1 feature: Number of related entities
1196    /// - 1 feature: Is part of cluster (0/1)
1197    /// - 1 feature: Is part of scenario (0/1)
1198    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1199    pub fn to_features(&self) -> Vec<f64> {
1200        let mut features = Vec::new();
1201
1202        // Category one-hot encoding
1203        let categories = [
1204            "Fraud",
1205            "Error",
1206            "ProcessIssue",
1207            "Statistical",
1208            "Relational",
1209            "Custom",
1210        ];
1211        for cat in &categories {
1212            features.push(if self.anomaly_type.category() == *cat {
1213                1.0
1214            } else {
1215                0.0
1216            });
1217        }
1218
1219        // Severity (normalized)
1220        features.push(self.severity as f64 / 5.0);
1221
1222        // Confidence
1223        features.push(self.confidence);
1224
1225        // Has monetary impact
1226        features.push(if self.monetary_impact.is_some() {
1227            1.0
1228        } else {
1229            0.0
1230        });
1231
1232        // Monetary impact (log-scaled)
1233        if let Some(impact) = self.monetary_impact {
1234            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1235            features.push((impact_f64.abs() + 1.0).ln());
1236        } else {
1237            features.push(0.0);
1238        }
1239
1240        // Is intentional
1241        features.push(if self.anomaly_type.is_intentional() {
1242            1.0
1243        } else {
1244            0.0
1245        });
1246
1247        // Number of related entities
1248        features.push(self.related_entities.len() as f64);
1249
1250        // Is part of cluster
1251        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1252
1253        // Provenance features
1254        // Is part of scenario
1255        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1256
1257        // Has parent anomaly (indicates this is a derived anomaly)
1258        features.push(if self.parent_anomaly_id.is_some() {
1259            1.0
1260        } else {
1261            0.0
1262        });
1263
1264        features
1265    }
1266
1267    /// Returns the number of features in the feature vector.
1268    pub fn feature_count() -> usize {
1269        15 // 6 category + 9 other features
1270    }
1271
1272    /// Returns feature names for documentation/ML metadata.
1273    pub fn feature_names() -> Vec<&'static str> {
1274        vec![
1275            "category_fraud",
1276            "category_error",
1277            "category_process_issue",
1278            "category_statistical",
1279            "category_relational",
1280            "category_custom",
1281            "severity_normalized",
1282            "confidence",
1283            "has_monetary_impact",
1284            "monetary_impact_log",
1285            "is_intentional",
1286            "related_entity_count",
1287            "is_clustered",
1288            "is_scenario_part",
1289            "is_derived",
1290        ]
1291    }
1292}
1293
1294/// Summary of anomalies for reporting.
1295#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1296pub struct AnomalySummary {
1297    /// Total anomaly count.
1298    pub total_count: usize,
1299    /// Count by category.
1300    pub by_category: HashMap<String, usize>,
1301    /// Count by specific type.
1302    pub by_type: HashMap<String, usize>,
1303    /// Count by severity.
1304    pub by_severity: HashMap<u8, usize>,
1305    /// Count by company.
1306    pub by_company: HashMap<String, usize>,
1307    /// Total monetary impact.
1308    pub total_monetary_impact: Decimal,
1309    /// Date range.
1310    pub date_range: Option<(NaiveDate, NaiveDate)>,
1311    /// Number of clusters.
1312    pub cluster_count: usize,
1313}
1314
1315impl AnomalySummary {
1316    /// Creates a summary from a list of anomalies.
1317    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1318        let mut summary = AnomalySummary {
1319            total_count: anomalies.len(),
1320            ..Default::default()
1321        };
1322
1323        let mut min_date: Option<NaiveDate> = None;
1324        let mut max_date: Option<NaiveDate> = None;
1325        let mut clusters = std::collections::HashSet::new();
1326
1327        for anomaly in anomalies {
1328            // By category
1329            *summary
1330                .by_category
1331                .entry(anomaly.anomaly_type.category().to_string())
1332                .or_insert(0) += 1;
1333
1334            // By type
1335            *summary
1336                .by_type
1337                .entry(anomaly.anomaly_type.type_name())
1338                .or_insert(0) += 1;
1339
1340            // By severity
1341            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1342
1343            // By company
1344            *summary
1345                .by_company
1346                .entry(anomaly.company_code.clone())
1347                .or_insert(0) += 1;
1348
1349            // Monetary impact
1350            if let Some(impact) = anomaly.monetary_impact {
1351                summary.total_monetary_impact += impact;
1352            }
1353
1354            // Date range
1355            match min_date {
1356                None => min_date = Some(anomaly.anomaly_date),
1357                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1358                _ => {}
1359            }
1360            match max_date {
1361                None => max_date = Some(anomaly.anomaly_date),
1362                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1363                _ => {}
1364            }
1365
1366            // Clusters
1367            if let Some(cluster_id) = &anomaly.cluster_id {
1368                clusters.insert(cluster_id.clone());
1369            }
1370        }
1371
1372        summary.date_range = min_date.zip(max_date);
1373        summary.cluster_count = clusters.len();
1374
1375        summary
1376    }
1377}
1378
1379// ============================================================================
1380// ENHANCED ANOMALY TAXONOMY (FR-003)
1381// ============================================================================
1382
1383/// High-level anomaly category for multi-class classification.
1384///
1385/// These categories provide a more granular classification than the base
1386/// AnomalyType enum, enabling better ML model training and audit reporting.
1387#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1388pub enum AnomalyCategory {
1389    // Vendor-related anomalies
1390    /// Fictitious or shell vendor.
1391    FictitiousVendor,
1392    /// Kickback or collusion with vendor.
1393    VendorKickback,
1394    /// Related party vendor transactions.
1395    RelatedPartyVendor,
1396
1397    // Transaction-related anomalies
1398    /// Duplicate payment or invoice.
1399    DuplicatePayment,
1400    /// Unauthorized transaction.
1401    UnauthorizedTransaction,
1402    /// Structured transactions to avoid thresholds.
1403    StructuredTransaction,
1404
1405    // Pattern-based anomalies
1406    /// Circular flow of funds.
1407    CircularFlow,
1408    /// Behavioral anomaly (deviation from normal patterns).
1409    BehavioralAnomaly,
1410    /// Timing-based anomaly.
1411    TimingAnomaly,
1412
1413    // Journal entry anomalies
1414    /// Manual journal entry anomaly.
1415    JournalAnomaly,
1416    /// Manual override of controls.
1417    ManualOverride,
1418    /// Missing approval in chain.
1419    MissingApproval,
1420
1421    // Statistical anomalies
1422    /// Statistical outlier.
1423    StatisticalOutlier,
1424    /// Distribution anomaly (Benford, etc.).
1425    DistributionAnomaly,
1426
1427    // Custom category
1428    /// User-defined category.
1429    Custom(String),
1430}
1431
1432impl AnomalyCategory {
1433    /// Derives an AnomalyCategory from an AnomalyType.
1434    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1435        match anomaly_type {
1436            AnomalyType::Fraud(fraud_type) => match fraud_type {
1437                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1438                    AnomalyCategory::FictitiousVendor
1439                }
1440                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1441                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1442                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1443                    AnomalyCategory::StructuredTransaction
1444                }
1445                FraudType::SelfApproval
1446                | FraudType::UnauthorizedApproval
1447                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1448                FraudType::TimingAnomaly
1449                | FraudType::RoundDollarManipulation
1450                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1451                _ => AnomalyCategory::BehavioralAnomaly,
1452            },
1453            AnomalyType::Error(error_type) => match error_type {
1454                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1455                ErrorType::WrongPeriod
1456                | ErrorType::BackdatedEntry
1457                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1458                _ => AnomalyCategory::JournalAnomaly,
1459            },
1460            AnomalyType::ProcessIssue(process_type) => match process_type {
1461                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1462                    AnomalyCategory::MissingApproval
1463                }
1464                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1465                    AnomalyCategory::ManualOverride
1466                }
1467                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1468                    AnomalyCategory::TimingAnomaly
1469                }
1470                _ => AnomalyCategory::BehavioralAnomaly,
1471            },
1472            AnomalyType::Statistical(stat_type) => match stat_type {
1473                StatisticalAnomalyType::BenfordViolation
1474                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1475                _ => AnomalyCategory::StatisticalOutlier,
1476            },
1477            AnomalyType::Relational(rel_type) => match rel_type {
1478                RelationalAnomalyType::CircularTransaction
1479                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1480                _ => AnomalyCategory::BehavioralAnomaly,
1481            },
1482            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1483        }
1484    }
1485
1486    /// Returns the category name as a string.
1487    pub fn name(&self) -> &str {
1488        match self {
1489            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1490            AnomalyCategory::VendorKickback => "vendor_kickback",
1491            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1492            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1493            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1494            AnomalyCategory::StructuredTransaction => "structured_transaction",
1495            AnomalyCategory::CircularFlow => "circular_flow",
1496            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1497            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1498            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1499            AnomalyCategory::ManualOverride => "manual_override",
1500            AnomalyCategory::MissingApproval => "missing_approval",
1501            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1502            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1503            AnomalyCategory::Custom(s) => s.as_str(),
1504        }
1505    }
1506
1507    /// Returns the ordinal value for ML encoding.
1508    pub fn ordinal(&self) -> u8 {
1509        match self {
1510            AnomalyCategory::FictitiousVendor => 0,
1511            AnomalyCategory::VendorKickback => 1,
1512            AnomalyCategory::RelatedPartyVendor => 2,
1513            AnomalyCategory::DuplicatePayment => 3,
1514            AnomalyCategory::UnauthorizedTransaction => 4,
1515            AnomalyCategory::StructuredTransaction => 5,
1516            AnomalyCategory::CircularFlow => 6,
1517            AnomalyCategory::BehavioralAnomaly => 7,
1518            AnomalyCategory::TimingAnomaly => 8,
1519            AnomalyCategory::JournalAnomaly => 9,
1520            AnomalyCategory::ManualOverride => 10,
1521            AnomalyCategory::MissingApproval => 11,
1522            AnomalyCategory::StatisticalOutlier => 12,
1523            AnomalyCategory::DistributionAnomaly => 13,
1524            AnomalyCategory::Custom(_) => 14,
1525        }
1526    }
1527
1528    /// Returns the total number of categories (excluding Custom).
1529    pub fn category_count() -> usize {
1530        15 // 14 fixed categories + Custom
1531    }
1532}
1533
1534/// Type of contributing factor for anomaly confidence/severity calculation.
1535#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1536pub enum FactorType {
1537    /// Amount deviation from expected value.
1538    AmountDeviation,
1539    /// Proximity to approval/reporting threshold.
1540    ThresholdProximity,
1541    /// Timing-related anomaly indicator.
1542    TimingAnomaly,
1543    /// Entity risk score contribution.
1544    EntityRisk,
1545    /// Pattern match confidence.
1546    PatternMatch,
1547    /// Frequency deviation from normal.
1548    FrequencyDeviation,
1549    /// Relationship-based anomaly indicator.
1550    RelationshipAnomaly,
1551    /// Control bypass indicator.
1552    ControlBypass,
1553    /// Benford's Law violation.
1554    BenfordViolation,
1555    /// Duplicate indicator.
1556    DuplicateIndicator,
1557    /// Approval chain issue.
1558    ApprovalChainIssue,
1559    /// Documentation gap.
1560    DocumentationGap,
1561    /// Custom factor type.
1562    Custom,
1563}
1564
1565impl FactorType {
1566    /// Returns the factor type name.
1567    pub fn name(&self) -> &'static str {
1568        match self {
1569            FactorType::AmountDeviation => "amount_deviation",
1570            FactorType::ThresholdProximity => "threshold_proximity",
1571            FactorType::TimingAnomaly => "timing_anomaly",
1572            FactorType::EntityRisk => "entity_risk",
1573            FactorType::PatternMatch => "pattern_match",
1574            FactorType::FrequencyDeviation => "frequency_deviation",
1575            FactorType::RelationshipAnomaly => "relationship_anomaly",
1576            FactorType::ControlBypass => "control_bypass",
1577            FactorType::BenfordViolation => "benford_violation",
1578            FactorType::DuplicateIndicator => "duplicate_indicator",
1579            FactorType::ApprovalChainIssue => "approval_chain_issue",
1580            FactorType::DocumentationGap => "documentation_gap",
1581            FactorType::Custom => "custom",
1582        }
1583    }
1584}
1585
1586/// Evidence supporting a contributing factor.
1587#[derive(Debug, Clone, Serialize, Deserialize)]
1588pub struct FactorEvidence {
1589    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1590    pub source: String,
1591    /// Raw evidence data.
1592    pub data: HashMap<String, String>,
1593}
1594
1595/// A contributing factor to anomaly confidence/severity.
1596#[derive(Debug, Clone, Serialize, Deserialize)]
1597pub struct ContributingFactor {
1598    /// Type of factor.
1599    pub factor_type: FactorType,
1600    /// Observed value.
1601    pub value: f64,
1602    /// Threshold or expected value.
1603    pub threshold: f64,
1604    /// Direction of comparison (true = value > threshold is anomalous).
1605    pub direction_greater: bool,
1606    /// Weight of this factor in overall calculation (0.0 - 1.0).
1607    pub weight: f64,
1608    /// Human-readable description.
1609    pub description: String,
1610    /// Optional supporting evidence.
1611    pub evidence: Option<FactorEvidence>,
1612}
1613
1614impl ContributingFactor {
1615    /// Creates a new contributing factor.
1616    pub fn new(
1617        factor_type: FactorType,
1618        value: f64,
1619        threshold: f64,
1620        direction_greater: bool,
1621        weight: f64,
1622        description: &str,
1623    ) -> Self {
1624        Self {
1625            factor_type,
1626            value,
1627            threshold,
1628            direction_greater,
1629            weight,
1630            description: description.to_string(),
1631            evidence: None,
1632        }
1633    }
1634
1635    /// Adds evidence to the factor.
1636    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1637        self.evidence = Some(FactorEvidence {
1638            source: source.to_string(),
1639            data,
1640        });
1641        self
1642    }
1643
1644    /// Calculates the factor's contribution to anomaly score.
1645    pub fn contribution(&self) -> f64 {
1646        let deviation = if self.direction_greater {
1647            (self.value - self.threshold).max(0.0)
1648        } else {
1649            (self.threshold - self.value).max(0.0)
1650        };
1651
1652        // Normalize by threshold to get relative deviation
1653        let relative_deviation = if self.threshold.abs() > 0.001 {
1654            deviation / self.threshold.abs()
1655        } else {
1656            deviation
1657        };
1658
1659        // Apply weight and cap at 1.0
1660        (relative_deviation * self.weight).min(1.0)
1661    }
1662}
1663
1664/// Enhanced anomaly label with dynamic confidence and severity.
1665#[derive(Debug, Clone, Serialize, Deserialize)]
1666pub struct EnhancedAnomalyLabel {
1667    /// Base labeled anomaly (backward compatible).
1668    pub base: LabeledAnomaly,
1669    /// Enhanced category classification.
1670    pub category: AnomalyCategory,
1671    /// Dynamically calculated confidence (0.0 - 1.0).
1672    pub enhanced_confidence: f64,
1673    /// Contextually calculated severity (0.0 - 1.0).
1674    pub enhanced_severity: f64,
1675    /// Factors contributing to confidence/severity.
1676    pub contributing_factors: Vec<ContributingFactor>,
1677    /// Secondary categories (for multi-label classification).
1678    pub secondary_categories: Vec<AnomalyCategory>,
1679}
1680
1681impl EnhancedAnomalyLabel {
1682    /// Creates an enhanced label from a base labeled anomaly.
1683    pub fn from_base(base: LabeledAnomaly) -> Self {
1684        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1685        let enhanced_confidence = base.confidence;
1686        let enhanced_severity = base.severity as f64 / 5.0;
1687
1688        Self {
1689            base,
1690            category,
1691            enhanced_confidence,
1692            enhanced_severity,
1693            contributing_factors: Vec::new(),
1694            secondary_categories: Vec::new(),
1695        }
1696    }
1697
1698    /// Sets the enhanced confidence.
1699    pub fn with_confidence(mut self, confidence: f64) -> Self {
1700        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1701        self
1702    }
1703
1704    /// Sets the enhanced severity.
1705    pub fn with_severity(mut self, severity: f64) -> Self {
1706        self.enhanced_severity = severity.clamp(0.0, 1.0);
1707        self
1708    }
1709
1710    /// Adds a contributing factor.
1711    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1712        self.contributing_factors.push(factor);
1713        self
1714    }
1715
1716    /// Adds a secondary category.
1717    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1718        if !self.secondary_categories.contains(&category) && category != self.category {
1719            self.secondary_categories.push(category);
1720        }
1721        self
1722    }
1723
1724    /// Converts to an extended feature vector.
1725    ///
1726    /// Returns base features (15) + enhanced features (10) = 25 features.
1727    pub fn to_features(&self) -> Vec<f64> {
1728        let mut features = self.base.to_features();
1729
1730        // Enhanced features
1731        features.push(self.enhanced_confidence);
1732        features.push(self.enhanced_severity);
1733        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1734        features.push(self.secondary_categories.len() as f64);
1735        features.push(self.contributing_factors.len() as f64);
1736
1737        // Max factor weight
1738        let max_weight = self
1739            .contributing_factors
1740            .iter()
1741            .map(|f| f.weight)
1742            .fold(0.0, f64::max);
1743        features.push(max_weight);
1744
1745        // Factor type indicators (binary flags for key factor types)
1746        let has_control_bypass = self
1747            .contributing_factors
1748            .iter()
1749            .any(|f| f.factor_type == FactorType::ControlBypass);
1750        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1751
1752        let has_amount_deviation = self
1753            .contributing_factors
1754            .iter()
1755            .any(|f| f.factor_type == FactorType::AmountDeviation);
1756        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1757
1758        let has_timing = self
1759            .contributing_factors
1760            .iter()
1761            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1762        features.push(if has_timing { 1.0 } else { 0.0 });
1763
1764        let has_pattern_match = self
1765            .contributing_factors
1766            .iter()
1767            .any(|f| f.factor_type == FactorType::PatternMatch);
1768        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1769
1770        features
1771    }
1772
1773    /// Returns the number of features in the enhanced feature vector.
1774    pub fn feature_count() -> usize {
1775        25 // 15 base + 10 enhanced
1776    }
1777
1778    /// Returns feature names for the enhanced feature vector.
1779    pub fn feature_names() -> Vec<&'static str> {
1780        let mut names = LabeledAnomaly::feature_names();
1781        names.extend(vec![
1782            "enhanced_confidence",
1783            "enhanced_severity",
1784            "category_ordinal",
1785            "secondary_category_count",
1786            "contributing_factor_count",
1787            "max_factor_weight",
1788            "has_control_bypass",
1789            "has_amount_deviation",
1790            "has_timing_factor",
1791            "has_pattern_match",
1792        ]);
1793        names
1794    }
1795}
1796
1797// ============================================================================
1798// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1799// ============================================================================
1800
1801/// Severity level classification for anomalies.
1802#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1803pub enum SeverityLevel {
1804    /// Minor issue, low impact.
1805    Low,
1806    /// Moderate issue, noticeable impact.
1807    #[default]
1808    Medium,
1809    /// Significant issue, substantial impact.
1810    High,
1811    /// Critical issue, severe impact requiring immediate attention.
1812    Critical,
1813}
1814
1815impl SeverityLevel {
1816    /// Returns the numeric value (1-4) for the severity level.
1817    pub fn numeric(&self) -> u8 {
1818        match self {
1819            SeverityLevel::Low => 1,
1820            SeverityLevel::Medium => 2,
1821            SeverityLevel::High => 3,
1822            SeverityLevel::Critical => 4,
1823        }
1824    }
1825
1826    /// Creates a severity level from a numeric value.
1827    pub fn from_numeric(value: u8) -> Self {
1828        match value {
1829            1 => SeverityLevel::Low,
1830            2 => SeverityLevel::Medium,
1831            3 => SeverityLevel::High,
1832            _ => SeverityLevel::Critical,
1833        }
1834    }
1835
1836    /// Creates a severity level from a normalized score (0.0-1.0).
1837    pub fn from_score(score: f64) -> Self {
1838        match score {
1839            s if s < 0.25 => SeverityLevel::Low,
1840            s if s < 0.50 => SeverityLevel::Medium,
1841            s if s < 0.75 => SeverityLevel::High,
1842            _ => SeverityLevel::Critical,
1843        }
1844    }
1845
1846    /// Returns a normalized score (0.0-1.0) for this severity level.
1847    pub fn to_score(&self) -> f64 {
1848        match self {
1849            SeverityLevel::Low => 0.125,
1850            SeverityLevel::Medium => 0.375,
1851            SeverityLevel::High => 0.625,
1852            SeverityLevel::Critical => 0.875,
1853        }
1854    }
1855}
1856
1857/// Structured severity scoring for anomalies.
1858#[derive(Debug, Clone, Serialize, Deserialize)]
1859pub struct AnomalySeverity {
1860    /// Severity level classification.
1861    pub level: SeverityLevel,
1862    /// Continuous severity score (0.0-1.0).
1863    pub score: f64,
1864    /// Absolute financial impact amount.
1865    pub financial_impact: Decimal,
1866    /// Whether this exceeds materiality threshold.
1867    pub is_material: bool,
1868    /// Materiality threshold used for determination.
1869    #[serde(default, skip_serializing_if = "Option::is_none")]
1870    pub materiality_threshold: Option<Decimal>,
1871}
1872
1873impl AnomalySeverity {
1874    /// Creates a new severity assessment.
1875    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1876        Self {
1877            level,
1878            score: level.to_score(),
1879            financial_impact,
1880            is_material: false,
1881            materiality_threshold: None,
1882        }
1883    }
1884
1885    /// Creates severity from a score, auto-determining level.
1886    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1887        Self {
1888            level: SeverityLevel::from_score(score),
1889            score: score.clamp(0.0, 1.0),
1890            financial_impact,
1891            is_material: false,
1892            materiality_threshold: None,
1893        }
1894    }
1895
1896    /// Sets the materiality assessment.
1897    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1898        self.materiality_threshold = Some(threshold);
1899        self.is_material = self.financial_impact.abs() >= threshold;
1900        self
1901    }
1902}
1903
1904impl Default for AnomalySeverity {
1905    fn default() -> Self {
1906        Self {
1907            level: SeverityLevel::Medium,
1908            score: 0.5,
1909            financial_impact: Decimal::ZERO,
1910            is_material: false,
1911            materiality_threshold: None,
1912        }
1913    }
1914}
1915
1916/// Detection difficulty classification for anomalies.
1917///
1918/// Categorizes how difficult an anomaly is to detect, which is useful
1919/// for ML model benchmarking and audit procedure selection.
1920///
1921/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1922/// is used for drift event classification and has different variants.
1923#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1924pub enum AnomalyDetectionDifficulty {
1925    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1926    Trivial,
1927    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1928    Easy,
1929    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1930    #[default]
1931    Moderate,
1932    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1933    Hard,
1934    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1935    Expert,
1936}
1937
1938impl AnomalyDetectionDifficulty {
1939    /// Returns the expected detection rate for this difficulty level.
1940    pub fn expected_detection_rate(&self) -> f64 {
1941        match self {
1942            AnomalyDetectionDifficulty::Trivial => 0.99,
1943            AnomalyDetectionDifficulty::Easy => 0.90,
1944            AnomalyDetectionDifficulty::Moderate => 0.70,
1945            AnomalyDetectionDifficulty::Hard => 0.40,
1946            AnomalyDetectionDifficulty::Expert => 0.15,
1947        }
1948    }
1949
1950    /// Returns a numeric difficulty score (0.0-1.0).
1951    pub fn difficulty_score(&self) -> f64 {
1952        match self {
1953            AnomalyDetectionDifficulty::Trivial => 0.05,
1954            AnomalyDetectionDifficulty::Easy => 0.25,
1955            AnomalyDetectionDifficulty::Moderate => 0.50,
1956            AnomalyDetectionDifficulty::Hard => 0.75,
1957            AnomalyDetectionDifficulty::Expert => 0.95,
1958        }
1959    }
1960
1961    /// Creates a difficulty level from a score (0.0-1.0).
1962    pub fn from_score(score: f64) -> Self {
1963        match score {
1964            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1965            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1966            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1967            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1968            _ => AnomalyDetectionDifficulty::Expert,
1969        }
1970    }
1971
1972    /// Returns the name of this difficulty level.
1973    pub fn name(&self) -> &'static str {
1974        match self {
1975            AnomalyDetectionDifficulty::Trivial => "trivial",
1976            AnomalyDetectionDifficulty::Easy => "easy",
1977            AnomalyDetectionDifficulty::Moderate => "moderate",
1978            AnomalyDetectionDifficulty::Hard => "hard",
1979            AnomalyDetectionDifficulty::Expert => "expert",
1980        }
1981    }
1982}
1983
1984/// Ground truth certainty level for anomaly labels.
1985///
1986/// Indicates how certain we are that the label is correct.
1987#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1988pub enum GroundTruthCertainty {
1989    /// Definitively known (injected anomaly with full provenance).
1990    #[default]
1991    Definite,
1992    /// Highly probable based on strong evidence.
1993    Probable,
1994    /// Possibly an anomaly based on indirect evidence.
1995    Possible,
1996}
1997
1998impl GroundTruthCertainty {
1999    /// Returns a certainty score (0.0-1.0).
2000    pub fn certainty_score(&self) -> f64 {
2001        match self {
2002            GroundTruthCertainty::Definite => 1.0,
2003            GroundTruthCertainty::Probable => 0.8,
2004            GroundTruthCertainty::Possible => 0.5,
2005        }
2006    }
2007
2008    /// Returns the name of this certainty level.
2009    pub fn name(&self) -> &'static str {
2010        match self {
2011            GroundTruthCertainty::Definite => "definite",
2012            GroundTruthCertainty::Probable => "probable",
2013            GroundTruthCertainty::Possible => "possible",
2014        }
2015    }
2016}
2017
2018/// Detection method classification.
2019///
2020/// Indicates which detection methods are recommended or effective for an anomaly.
2021#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2022pub enum DetectionMethod {
2023    /// Simple rule-based detection (thresholds, filters).
2024    RuleBased,
2025    /// Statistical analysis (distributions, outlier detection).
2026    Statistical,
2027    /// Machine learning models (classification, anomaly detection).
2028    MachineLearning,
2029    /// Graph-based analysis (network patterns, relationships).
2030    GraphBased,
2031    /// Manual forensic audit procedures.
2032    ForensicAudit,
2033    /// Combination of multiple methods.
2034    Hybrid,
2035}
2036
2037impl DetectionMethod {
2038    /// Returns the name of this detection method.
2039    pub fn name(&self) -> &'static str {
2040        match self {
2041            DetectionMethod::RuleBased => "rule_based",
2042            DetectionMethod::Statistical => "statistical",
2043            DetectionMethod::MachineLearning => "machine_learning",
2044            DetectionMethod::GraphBased => "graph_based",
2045            DetectionMethod::ForensicAudit => "forensic_audit",
2046            DetectionMethod::Hybrid => "hybrid",
2047        }
2048    }
2049
2050    /// Returns a description of this detection method.
2051    pub fn description(&self) -> &'static str {
2052        match self {
2053            DetectionMethod::RuleBased => "Simple threshold and filter rules",
2054            DetectionMethod::Statistical => "Statistical distribution analysis",
2055            DetectionMethod::MachineLearning => "ML classification models",
2056            DetectionMethod::GraphBased => "Network and relationship analysis",
2057            DetectionMethod::ForensicAudit => "Manual forensic procedures",
2058            DetectionMethod::Hybrid => "Combined multi-method approach",
2059        }
2060    }
2061}
2062
2063/// Extended anomaly label with comprehensive multi-dimensional classification.
2064///
2065/// This extends the base `EnhancedAnomalyLabel` with additional fields for
2066/// severity scoring, detection difficulty, recommended methods, and ground truth.
2067#[derive(Debug, Clone, Serialize, Deserialize)]
2068pub struct ExtendedAnomalyLabel {
2069    /// Base labeled anomaly.
2070    pub base: LabeledAnomaly,
2071    /// Enhanced category classification.
2072    pub category: AnomalyCategory,
2073    /// Structured severity assessment.
2074    pub severity: AnomalySeverity,
2075    /// Detection difficulty classification.
2076    pub detection_difficulty: AnomalyDetectionDifficulty,
2077    /// Recommended detection methods for this anomaly.
2078    pub recommended_methods: Vec<DetectionMethod>,
2079    /// Key indicators that should trigger detection.
2080    pub key_indicators: Vec<String>,
2081    /// Ground truth certainty level.
2082    pub ground_truth_certainty: GroundTruthCertainty,
2083    /// Contributing factors to confidence/severity.
2084    pub contributing_factors: Vec<ContributingFactor>,
2085    /// Related entity IDs (vendors, customers, employees, etc.).
2086    pub related_entity_ids: Vec<String>,
2087    /// Secondary categories for multi-label classification.
2088    pub secondary_categories: Vec<AnomalyCategory>,
2089    /// Scheme ID if part of a multi-stage fraud scheme.
2090    #[serde(default, skip_serializing_if = "Option::is_none")]
2091    pub scheme_id: Option<String>,
2092    /// Stage number within a scheme (1-indexed).
2093    #[serde(default, skip_serializing_if = "Option::is_none")]
2094    pub scheme_stage: Option<u32>,
2095    /// Whether this is a near-miss (suspicious but legitimate).
2096    #[serde(default)]
2097    pub is_near_miss: bool,
2098    /// Explanation if this is a near-miss.
2099    #[serde(default, skip_serializing_if = "Option::is_none")]
2100    pub near_miss_explanation: Option<String>,
2101}
2102
2103impl ExtendedAnomalyLabel {
2104    /// Creates an extended label from a base labeled anomaly.
2105    pub fn from_base(base: LabeledAnomaly) -> Self {
2106        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
2107        let severity = AnomalySeverity {
2108            level: SeverityLevel::from_numeric(base.severity),
2109            score: base.severity as f64 / 5.0,
2110            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
2111            is_material: false,
2112            materiality_threshold: None,
2113        };
2114
2115        Self {
2116            base,
2117            category,
2118            severity,
2119            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
2120            recommended_methods: vec![DetectionMethod::RuleBased],
2121            key_indicators: Vec::new(),
2122            ground_truth_certainty: GroundTruthCertainty::Definite,
2123            contributing_factors: Vec::new(),
2124            related_entity_ids: Vec::new(),
2125            secondary_categories: Vec::new(),
2126            scheme_id: None,
2127            scheme_stage: None,
2128            is_near_miss: false,
2129            near_miss_explanation: None,
2130        }
2131    }
2132
2133    /// Sets the severity assessment.
2134    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
2135        self.severity = severity;
2136        self
2137    }
2138
2139    /// Sets the detection difficulty.
2140    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
2141        self.detection_difficulty = difficulty;
2142        self
2143    }
2144
2145    /// Adds a recommended detection method.
2146    pub fn with_method(mut self, method: DetectionMethod) -> Self {
2147        if !self.recommended_methods.contains(&method) {
2148            self.recommended_methods.push(method);
2149        }
2150        self
2151    }
2152
2153    /// Sets the recommended detection methods.
2154    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
2155        self.recommended_methods = methods;
2156        self
2157    }
2158
2159    /// Adds a key indicator.
2160    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
2161        self.key_indicators.push(indicator.into());
2162        self
2163    }
2164
2165    /// Sets the ground truth certainty.
2166    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
2167        self.ground_truth_certainty = certainty;
2168        self
2169    }
2170
2171    /// Adds a contributing factor.
2172    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
2173        self.contributing_factors.push(factor);
2174        self
2175    }
2176
2177    /// Adds a related entity ID.
2178    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
2179        self.related_entity_ids.push(entity_id.into());
2180        self
2181    }
2182
2183    /// Adds a secondary category.
2184    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2185        if category != self.category && !self.secondary_categories.contains(&category) {
2186            self.secondary_categories.push(category);
2187        }
2188        self
2189    }
2190
2191    /// Sets scheme information.
2192    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2193        self.scheme_id = Some(scheme_id.into());
2194        self.scheme_stage = Some(stage);
2195        self
2196    }
2197
2198    /// Marks this as a near-miss with explanation.
2199    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2200        self.is_near_miss = true;
2201        self.near_miss_explanation = Some(explanation.into());
2202        self
2203    }
2204
2205    /// Converts to an extended feature vector for ML.
2206    ///
2207    /// Returns base features (15) + extended features (15) = 30 features.
2208    pub fn to_features(&self) -> Vec<f64> {
2209        let mut features = self.base.to_features();
2210
2211        // Extended features
2212        features.push(self.severity.score);
2213        features.push(self.severity.level.to_score());
2214        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2215        features.push(self.detection_difficulty.difficulty_score());
2216        features.push(self.detection_difficulty.expected_detection_rate());
2217        features.push(self.ground_truth_certainty.certainty_score());
2218        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2219        features.push(self.secondary_categories.len() as f64);
2220        features.push(self.contributing_factors.len() as f64);
2221        features.push(self.key_indicators.len() as f64);
2222        features.push(self.recommended_methods.len() as f64);
2223        features.push(self.related_entity_ids.len() as f64);
2224        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2225        features.push(self.scheme_stage.unwrap_or(0) as f64);
2226        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2227
2228        features
2229    }
2230
2231    /// Returns the number of features in the extended feature vector.
2232    pub fn feature_count() -> usize {
2233        30 // 15 base + 15 extended
2234    }
2235
2236    /// Returns feature names for the extended feature vector.
2237    pub fn feature_names() -> Vec<&'static str> {
2238        let mut names = LabeledAnomaly::feature_names();
2239        names.extend(vec![
2240            "severity_score",
2241            "severity_level_score",
2242            "is_material",
2243            "difficulty_score",
2244            "expected_detection_rate",
2245            "ground_truth_certainty",
2246            "category_ordinal",
2247            "secondary_category_count",
2248            "contributing_factor_count",
2249            "key_indicator_count",
2250            "recommended_method_count",
2251            "related_entity_count",
2252            "is_part_of_scheme",
2253            "scheme_stage",
2254            "is_near_miss",
2255        ]);
2256        names
2257    }
2258}
2259
2260// ============================================================================
2261// MULTI-STAGE FRAUD SCHEME TYPES
2262// ============================================================================
2263
2264/// Type of multi-stage fraud scheme.
2265#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2266pub enum SchemeType {
2267    /// Gradual embezzlement over time.
2268    GradualEmbezzlement,
2269    /// Revenue manipulation across periods.
2270    RevenueManipulation,
2271    /// Vendor kickback scheme.
2272    VendorKickback,
2273    /// Round-tripping funds through multiple entities.
2274    RoundTripping,
2275    /// Ghost employee scheme.
2276    GhostEmployee,
2277    /// Expense reimbursement fraud.
2278    ExpenseReimbursement,
2279    /// Inventory theft scheme.
2280    InventoryTheft,
2281    /// Custom scheme type.
2282    Custom,
2283}
2284
2285impl SchemeType {
2286    /// Returns the name of this scheme type.
2287    pub fn name(&self) -> &'static str {
2288        match self {
2289            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2290            SchemeType::RevenueManipulation => "revenue_manipulation",
2291            SchemeType::VendorKickback => "vendor_kickback",
2292            SchemeType::RoundTripping => "round_tripping",
2293            SchemeType::GhostEmployee => "ghost_employee",
2294            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2295            SchemeType::InventoryTheft => "inventory_theft",
2296            SchemeType::Custom => "custom",
2297        }
2298    }
2299
2300    /// Returns the typical number of stages for this scheme type.
2301    pub fn typical_stages(&self) -> u32 {
2302        match self {
2303            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2304            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2305            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2306            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2307            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2308            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2309            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2310            SchemeType::Custom => 4,
2311        }
2312    }
2313}
2314
2315/// Status of detection for a fraud scheme.
2316#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2317pub enum SchemeDetectionStatus {
2318    /// Scheme is undetected.
2319    #[default]
2320    Undetected,
2321    /// Under investigation but not confirmed.
2322    UnderInvestigation,
2323    /// Partially detected (some transactions flagged).
2324    PartiallyDetected,
2325    /// Fully detected and confirmed.
2326    FullyDetected,
2327}
2328
2329/// Reference to a transaction within a scheme.
2330#[derive(Debug, Clone, Serialize, Deserialize)]
2331pub struct SchemeTransactionRef {
2332    /// Document ID of the transaction.
2333    pub document_id: String,
2334    /// Transaction date.
2335    pub date: chrono::NaiveDate,
2336    /// Transaction amount.
2337    pub amount: Decimal,
2338    /// Stage this transaction belongs to.
2339    pub stage: u32,
2340    /// Anomaly ID if labeled.
2341    #[serde(default, skip_serializing_if = "Option::is_none")]
2342    pub anomaly_id: Option<String>,
2343}
2344
2345/// Concealment technique used in fraud.
2346#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2347pub enum ConcealmentTechnique {
2348    /// Document manipulation or forgery.
2349    DocumentManipulation,
2350    /// Circumventing approval processes.
2351    ApprovalCircumvention,
2352    /// Exploiting timing (period-end, holidays).
2353    TimingExploitation,
2354    /// Transaction splitting to avoid thresholds.
2355    TransactionSplitting,
2356    /// Account misclassification.
2357    AccountMisclassification,
2358    /// Collusion with other employees.
2359    Collusion,
2360    /// Data alteration or deletion.
2361    DataAlteration,
2362    /// Creating false documentation.
2363    FalseDocumentation,
2364}
2365
2366impl ConcealmentTechnique {
2367    /// Returns the difficulty bonus this technique adds.
2368    pub fn difficulty_bonus(&self) -> f64 {
2369        match self {
2370            ConcealmentTechnique::DocumentManipulation => 0.20,
2371            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2372            ConcealmentTechnique::TimingExploitation => 0.10,
2373            ConcealmentTechnique::TransactionSplitting => 0.15,
2374            ConcealmentTechnique::AccountMisclassification => 0.10,
2375            ConcealmentTechnique::Collusion => 0.25,
2376            ConcealmentTechnique::DataAlteration => 0.20,
2377            ConcealmentTechnique::FalseDocumentation => 0.15,
2378        }
2379    }
2380}
2381
2382// ============================================================================
2383// ACFE-ALIGNED FRAUD TAXONOMY
2384// ============================================================================
2385//
2386// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2387// Nations: Occupational Fraud Classification System. This taxonomy provides
2388// ACFE-aligned categories, schemes, and calibration data.
2389
2390/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2391///
2392/// ACFE Report to the Nations statistics (typical):
2393/// - Asset Misappropriation: 86% of cases, $100k median loss
2394/// - Corruption: 33% of cases, $150k median loss
2395/// - Financial Statement Fraud: 10% of cases, $954k median loss
2396///
2397/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2398#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2399pub enum AcfeFraudCategory {
2400    /// Theft of organizational assets (cash, inventory, equipment).
2401    /// Most common (86% of cases) but typically lowest median loss ($100k).
2402    #[default]
2403    AssetMisappropriation,
2404    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2405    /// Medium frequency (33% of cases), medium median loss ($150k).
2406    Corruption,
2407    /// Intentional misstatement of financial statements.
2408    /// Least common (10% of cases) but highest median loss ($954k).
2409    FinancialStatementFraud,
2410}
2411
2412impl AcfeFraudCategory {
2413    /// Returns the name of this category.
2414    pub fn name(&self) -> &'static str {
2415        match self {
2416            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2417            AcfeFraudCategory::Corruption => "corruption",
2418            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2419        }
2420    }
2421
2422    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2423    pub fn typical_occurrence_rate(&self) -> f64 {
2424        match self {
2425            AcfeFraudCategory::AssetMisappropriation => 0.86,
2426            AcfeFraudCategory::Corruption => 0.33,
2427            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2428        }
2429    }
2430
2431    /// Returns the typical median loss amount (from ACFE reports).
2432    pub fn typical_median_loss(&self) -> Decimal {
2433        match self {
2434            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2435            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2436            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2437        }
2438    }
2439
2440    /// Returns the typical detection time in months (from ACFE reports).
2441    pub fn typical_detection_months(&self) -> u32 {
2442        match self {
2443            AcfeFraudCategory::AssetMisappropriation => 12,
2444            AcfeFraudCategory::Corruption => 18,
2445            AcfeFraudCategory::FinancialStatementFraud => 24,
2446        }
2447    }
2448}
2449
2450/// Cash-based fraud schemes under Asset Misappropriation.
2451///
2452/// Organized according to the ACFE Fraud Tree:
2453/// - Theft of Cash on Hand
2454/// - Theft of Cash Receipts
2455/// - Fraudulent Disbursements
2456#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2457pub enum CashFraudScheme {
2458    // ========== Theft of Cash on Hand ==========
2459    /// Stealing cash from cash drawers or safes after it has been recorded.
2460    Larceny,
2461    /// Stealing cash before it is recorded in the books (intercepts receipts).
2462    Skimming,
2463
2464    // ========== Theft of Cash Receipts ==========
2465    /// Skimming from sales transactions before recording.
2466    SalesSkimming,
2467    /// Intercepting customer payments on accounts receivable.
2468    ReceivablesSkimming,
2469    /// Creating false refunds to pocket the difference.
2470    RefundSchemes,
2471
2472    // ========== Fraudulent Disbursements - Billing Schemes ==========
2473    /// Creating fictitious vendors to invoice and pay.
2474    ShellCompany,
2475    /// Manipulating payments to legitimate vendors for personal gain.
2476    NonAccompliceVendor,
2477    /// Using company funds for personal purchases.
2478    PersonalPurchases,
2479
2480    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2481    /// Creating fake employees to collect wages.
2482    GhostEmployee,
2483    /// Falsifying hours worked, sales commissions, or salary rates.
2484    FalsifiedWages,
2485    /// Manipulating commission calculations.
2486    CommissionSchemes,
2487
2488    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2489    /// Claiming non-business expenses as business expenses.
2490    MischaracterizedExpenses,
2491    /// Inflating legitimate expense amounts.
2492    OverstatedExpenses,
2493    /// Creating completely fictitious expenses.
2494    FictitiousExpenses,
2495
2496    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2497    /// Forging the signature of an authorized check signer.
2498    ForgedMaker,
2499    /// Intercepting and altering the endorsement on legitimate checks.
2500    ForgedEndorsement,
2501    /// Altering the payee on a legitimate check.
2502    AlteredPayee,
2503    /// Authorized signer writing checks for personal benefit.
2504    AuthorizedMaker,
2505
2506    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2507    /// Creating false voided transactions.
2508    FalseVoids,
2509    /// Processing fictitious refunds.
2510    FalseRefunds,
2511}
2512
2513impl CashFraudScheme {
2514    /// Returns the ACFE category this scheme belongs to.
2515    pub fn category(&self) -> AcfeFraudCategory {
2516        AcfeFraudCategory::AssetMisappropriation
2517    }
2518
2519    /// Returns the subcategory within the ACFE Fraud Tree.
2520    pub fn subcategory(&self) -> &'static str {
2521        match self {
2522            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2523            CashFraudScheme::SalesSkimming
2524            | CashFraudScheme::ReceivablesSkimming
2525            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2526            CashFraudScheme::ShellCompany
2527            | CashFraudScheme::NonAccompliceVendor
2528            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2529            CashFraudScheme::GhostEmployee
2530            | CashFraudScheme::FalsifiedWages
2531            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2532            CashFraudScheme::MischaracterizedExpenses
2533            | CashFraudScheme::OverstatedExpenses
2534            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2535            CashFraudScheme::ForgedMaker
2536            | CashFraudScheme::ForgedEndorsement
2537            | CashFraudScheme::AlteredPayee
2538            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2539            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2540        }
2541    }
2542
2543    /// Returns the typical severity (1-5) for this scheme.
2544    pub fn severity(&self) -> u8 {
2545        match self {
2546            // Lower severity - often small amounts, easier to detect
2547            CashFraudScheme::FalseVoids
2548            | CashFraudScheme::FalseRefunds
2549            | CashFraudScheme::MischaracterizedExpenses => 3,
2550            // Medium severity
2551            CashFraudScheme::OverstatedExpenses
2552            | CashFraudScheme::Skimming
2553            | CashFraudScheme::Larceny
2554            | CashFraudScheme::PersonalPurchases
2555            | CashFraudScheme::FalsifiedWages => 4,
2556            // Higher severity - larger amounts, harder to detect
2557            CashFraudScheme::ShellCompany
2558            | CashFraudScheme::GhostEmployee
2559            | CashFraudScheme::FictitiousExpenses
2560            | CashFraudScheme::ForgedMaker
2561            | CashFraudScheme::AuthorizedMaker => 5,
2562            _ => 4,
2563        }
2564    }
2565
2566    /// Returns the typical detection difficulty.
2567    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2568        match self {
2569            // Easy to detect with basic controls
2570            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2571                AnomalyDetectionDifficulty::Easy
2572            }
2573            // Moderate - requires reconciliation
2574            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2575                AnomalyDetectionDifficulty::Moderate
2576            }
2577            // Hard - requires sophisticated analysis
2578            CashFraudScheme::Skimming
2579            | CashFraudScheme::ShellCompany
2580            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2581            // Expert level
2582            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2583                AnomalyDetectionDifficulty::Expert
2584            }
2585            _ => AnomalyDetectionDifficulty::Moderate,
2586        }
2587    }
2588
2589    /// Returns all variants for iteration.
2590    pub fn all_variants() -> &'static [CashFraudScheme] {
2591        &[
2592            CashFraudScheme::Larceny,
2593            CashFraudScheme::Skimming,
2594            CashFraudScheme::SalesSkimming,
2595            CashFraudScheme::ReceivablesSkimming,
2596            CashFraudScheme::RefundSchemes,
2597            CashFraudScheme::ShellCompany,
2598            CashFraudScheme::NonAccompliceVendor,
2599            CashFraudScheme::PersonalPurchases,
2600            CashFraudScheme::GhostEmployee,
2601            CashFraudScheme::FalsifiedWages,
2602            CashFraudScheme::CommissionSchemes,
2603            CashFraudScheme::MischaracterizedExpenses,
2604            CashFraudScheme::OverstatedExpenses,
2605            CashFraudScheme::FictitiousExpenses,
2606            CashFraudScheme::ForgedMaker,
2607            CashFraudScheme::ForgedEndorsement,
2608            CashFraudScheme::AlteredPayee,
2609            CashFraudScheme::AuthorizedMaker,
2610            CashFraudScheme::FalseVoids,
2611            CashFraudScheme::FalseRefunds,
2612        ]
2613    }
2614}
2615
2616/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2617#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2618pub enum AssetFraudScheme {
2619    // ========== Inventory Schemes ==========
2620    /// Misusing or converting inventory for personal benefit.
2621    InventoryMisuse,
2622    /// Stealing physical inventory items.
2623    InventoryTheft,
2624    /// Manipulating purchasing to facilitate theft.
2625    InventoryPurchasingScheme,
2626    /// Manipulating receiving/shipping to steal inventory.
2627    InventoryReceivingScheme,
2628
2629    // ========== Other Asset Schemes ==========
2630    /// Misusing company equipment or vehicles.
2631    EquipmentMisuse,
2632    /// Theft of company equipment, tools, or supplies.
2633    EquipmentTheft,
2634    /// Unauthorized access to or theft of intellectual property.
2635    IntellectualPropertyTheft,
2636    /// Using company time/resources for personal business.
2637    TimeTheft,
2638}
2639
2640impl AssetFraudScheme {
2641    /// Returns the ACFE category this scheme belongs to.
2642    pub fn category(&self) -> AcfeFraudCategory {
2643        AcfeFraudCategory::AssetMisappropriation
2644    }
2645
2646    /// Returns the subcategory within the ACFE Fraud Tree.
2647    pub fn subcategory(&self) -> &'static str {
2648        match self {
2649            AssetFraudScheme::InventoryMisuse
2650            | AssetFraudScheme::InventoryTheft
2651            | AssetFraudScheme::InventoryPurchasingScheme
2652            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2653            _ => "other_assets",
2654        }
2655    }
2656
2657    /// Returns the typical severity (1-5) for this scheme.
2658    pub fn severity(&self) -> u8 {
2659        match self {
2660            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2661            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2662            AssetFraudScheme::InventoryTheft
2663            | AssetFraudScheme::InventoryPurchasingScheme
2664            | AssetFraudScheme::InventoryReceivingScheme => 4,
2665            AssetFraudScheme::IntellectualPropertyTheft => 5,
2666        }
2667    }
2668}
2669
2670/// Corruption schemes under the ACFE Fraud Tree.
2671///
2672/// Corruption schemes involve the wrongful use of influence in a business
2673/// transaction to procure personal benefit.
2674#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2675pub enum CorruptionScheme {
2676    // ========== Conflicts of Interest ==========
2677    /// Employee has undisclosed financial interest in purchasing decisions.
2678    PurchasingConflict,
2679    /// Employee has undisclosed relationship with customer/vendor.
2680    SalesConflict,
2681    /// Employee owns or has interest in competing business.
2682    OutsideBusinessInterest,
2683    /// Employee makes decisions benefiting family members.
2684    NepotismConflict,
2685
2686    // ========== Bribery ==========
2687    /// Kickback payments from vendors for favorable treatment.
2688    InvoiceKickback,
2689    /// Collusion among vendors to inflate prices.
2690    BidRigging,
2691    /// Other cash payments for favorable decisions.
2692    CashBribery,
2693    /// Bribery of government officials.
2694    PublicOfficial,
2695
2696    // ========== Illegal Gratuities ==========
2697    /// Gifts given after favorable decisions (not agreed in advance).
2698    IllegalGratuity,
2699
2700    // ========== Economic Extortion ==========
2701    /// Demanding payment under threat of adverse action.
2702    EconomicExtortion,
2703}
2704
2705impl CorruptionScheme {
2706    /// Returns the ACFE category this scheme belongs to.
2707    pub fn category(&self) -> AcfeFraudCategory {
2708        AcfeFraudCategory::Corruption
2709    }
2710
2711    /// Returns the subcategory within the ACFE Fraud Tree.
2712    pub fn subcategory(&self) -> &'static str {
2713        match self {
2714            CorruptionScheme::PurchasingConflict
2715            | CorruptionScheme::SalesConflict
2716            | CorruptionScheme::OutsideBusinessInterest
2717            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2718            CorruptionScheme::InvoiceKickback
2719            | CorruptionScheme::BidRigging
2720            | CorruptionScheme::CashBribery
2721            | CorruptionScheme::PublicOfficial => "bribery",
2722            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2723            CorruptionScheme::EconomicExtortion => "economic_extortion",
2724        }
2725    }
2726
2727    /// Returns the typical severity (1-5) for this scheme.
2728    pub fn severity(&self) -> u8 {
2729        match self {
2730            // Lower severity conflicts of interest
2731            CorruptionScheme::NepotismConflict => 3,
2732            // Medium severity
2733            CorruptionScheme::PurchasingConflict
2734            | CorruptionScheme::SalesConflict
2735            | CorruptionScheme::OutsideBusinessInterest
2736            | CorruptionScheme::IllegalGratuity => 4,
2737            // High severity - active corruption
2738            CorruptionScheme::InvoiceKickback
2739            | CorruptionScheme::BidRigging
2740            | CorruptionScheme::CashBribery
2741            | CorruptionScheme::EconomicExtortion => 5,
2742            // Highest severity - involves public officials
2743            CorruptionScheme::PublicOfficial => 5,
2744        }
2745    }
2746
2747    /// Returns the typical detection difficulty.
2748    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2749        match self {
2750            // Easier to detect with proper disclosure requirements
2751            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2752                AnomalyDetectionDifficulty::Moderate
2753            }
2754            // Hard - requires transaction pattern analysis
2755            CorruptionScheme::PurchasingConflict
2756            | CorruptionScheme::SalesConflict
2757            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2758            // Expert level - deliberate concealment
2759            CorruptionScheme::InvoiceKickback
2760            | CorruptionScheme::CashBribery
2761            | CorruptionScheme::PublicOfficial
2762            | CorruptionScheme::IllegalGratuity
2763            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2764        }
2765    }
2766
2767    /// Returns all variants for iteration.
2768    pub fn all_variants() -> &'static [CorruptionScheme] {
2769        &[
2770            CorruptionScheme::PurchasingConflict,
2771            CorruptionScheme::SalesConflict,
2772            CorruptionScheme::OutsideBusinessInterest,
2773            CorruptionScheme::NepotismConflict,
2774            CorruptionScheme::InvoiceKickback,
2775            CorruptionScheme::BidRigging,
2776            CorruptionScheme::CashBribery,
2777            CorruptionScheme::PublicOfficial,
2778            CorruptionScheme::IllegalGratuity,
2779            CorruptionScheme::EconomicExtortion,
2780        ]
2781    }
2782}
2783
2784/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2785///
2786/// Financial statement fraud involves the intentional misstatement or omission
2787/// of material information in financial reports.
2788#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2789pub enum FinancialStatementScheme {
2790    // ========== Asset/Revenue Overstatement ==========
2791    /// Recording revenue before it is earned.
2792    PrematureRevenue,
2793    /// Deferring expenses to future periods.
2794    DelayedExpenses,
2795    /// Recording revenue for transactions that never occurred.
2796    FictitiousRevenues,
2797    /// Failing to record known liabilities.
2798    ConcealedLiabilities,
2799    /// Overstating the value of assets.
2800    ImproperAssetValuations,
2801    /// Omitting or misstating required disclosures.
2802    ImproperDisclosures,
2803    /// Manipulating timing of revenue recognition (channel stuffing).
2804    ChannelStuffing,
2805    /// Recognizing bill-and-hold revenue improperly.
2806    BillAndHold,
2807    /// Capitalizing expenses that should be expensed.
2808    ImproperCapitalization,
2809
2810    // ========== Asset/Revenue Understatement ==========
2811    /// Understating revenue (often for tax purposes).
2812    UnderstatedRevenues,
2813    /// Recording excessive expenses.
2814    OverstatedExpenses,
2815    /// Recording excessive liabilities or reserves.
2816    OverstatedLiabilities,
2817    /// Undervaluing assets for writedowns/reserves.
2818    ImproperAssetWritedowns,
2819}
2820
2821impl FinancialStatementScheme {
2822    /// Returns the ACFE category this scheme belongs to.
2823    pub fn category(&self) -> AcfeFraudCategory {
2824        AcfeFraudCategory::FinancialStatementFraud
2825    }
2826
2827    /// Returns the subcategory within the ACFE Fraud Tree.
2828    pub fn subcategory(&self) -> &'static str {
2829        match self {
2830            FinancialStatementScheme::UnderstatedRevenues
2831            | FinancialStatementScheme::OverstatedExpenses
2832            | FinancialStatementScheme::OverstatedLiabilities
2833            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2834            _ => "overstatement",
2835        }
2836    }
2837
2838    /// Returns the typical severity (1-5) for this scheme.
2839    pub fn severity(&self) -> u8 {
2840        // All financial statement fraud is high severity
2841        5
2842    }
2843
2844    /// Returns the typical detection difficulty.
2845    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2846        match self {
2847            // Easier to detect with good analytics
2848            FinancialStatementScheme::ChannelStuffing
2849            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2850            // Hard - requires deep analysis
2851            FinancialStatementScheme::PrematureRevenue
2852            | FinancialStatementScheme::ImproperCapitalization
2853            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2854            // Expert level
2855            FinancialStatementScheme::FictitiousRevenues
2856            | FinancialStatementScheme::ConcealedLiabilities
2857            | FinancialStatementScheme::ImproperAssetValuations
2858            | FinancialStatementScheme::ImproperDisclosures
2859            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2860            _ => AnomalyDetectionDifficulty::Hard,
2861        }
2862    }
2863
2864    /// Returns all variants for iteration.
2865    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2866        &[
2867            FinancialStatementScheme::PrematureRevenue,
2868            FinancialStatementScheme::DelayedExpenses,
2869            FinancialStatementScheme::FictitiousRevenues,
2870            FinancialStatementScheme::ConcealedLiabilities,
2871            FinancialStatementScheme::ImproperAssetValuations,
2872            FinancialStatementScheme::ImproperDisclosures,
2873            FinancialStatementScheme::ChannelStuffing,
2874            FinancialStatementScheme::BillAndHold,
2875            FinancialStatementScheme::ImproperCapitalization,
2876            FinancialStatementScheme::UnderstatedRevenues,
2877            FinancialStatementScheme::OverstatedExpenses,
2878            FinancialStatementScheme::OverstatedLiabilities,
2879            FinancialStatementScheme::ImproperAssetWritedowns,
2880        ]
2881    }
2882}
2883
2884/// Unified ACFE scheme type that encompasses all fraud schemes.
2885#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2886pub enum AcfeScheme {
2887    /// Cash-based fraud schemes.
2888    Cash(CashFraudScheme),
2889    /// Inventory and other asset fraud schemes.
2890    Asset(AssetFraudScheme),
2891    /// Corruption schemes.
2892    Corruption(CorruptionScheme),
2893    /// Financial statement fraud schemes.
2894    FinancialStatement(FinancialStatementScheme),
2895}
2896
2897impl AcfeScheme {
2898    /// Returns the ACFE category this scheme belongs to.
2899    pub fn category(&self) -> AcfeFraudCategory {
2900        match self {
2901            AcfeScheme::Cash(s) => s.category(),
2902            AcfeScheme::Asset(s) => s.category(),
2903            AcfeScheme::Corruption(s) => s.category(),
2904            AcfeScheme::FinancialStatement(s) => s.category(),
2905        }
2906    }
2907
2908    /// Returns the severity (1-5) for this scheme.
2909    pub fn severity(&self) -> u8 {
2910        match self {
2911            AcfeScheme::Cash(s) => s.severity(),
2912            AcfeScheme::Asset(s) => s.severity(),
2913            AcfeScheme::Corruption(s) => s.severity(),
2914            AcfeScheme::FinancialStatement(s) => s.severity(),
2915        }
2916    }
2917
2918    /// Returns the detection difficulty for this scheme.
2919    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2920        match self {
2921            AcfeScheme::Cash(s) => s.detection_difficulty(),
2922            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2923            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2924            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2925        }
2926    }
2927}
2928
2929/// How a fraud was detected (from ACFE statistics).
2930#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2931pub enum AcfeDetectionMethod {
2932    /// Tip from employee, customer, vendor, or anonymous source.
2933    Tip,
2934    /// Internal audit procedures.
2935    InternalAudit,
2936    /// Management review and oversight.
2937    ManagementReview,
2938    /// External audit procedures.
2939    ExternalAudit,
2940    /// Account reconciliation discrepancies.
2941    AccountReconciliation,
2942    /// Document examination.
2943    DocumentExamination,
2944    /// Discovered by accident.
2945    ByAccident,
2946    /// Automated monitoring/IT controls.
2947    ItControls,
2948    /// Surveillance or investigation.
2949    Surveillance,
2950    /// Confession by perpetrator.
2951    Confession,
2952    /// Law enforcement notification.
2953    LawEnforcement,
2954    /// Other detection method.
2955    Other,
2956}
2957
2958impl AcfeDetectionMethod {
2959    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2960    pub fn typical_detection_rate(&self) -> f64 {
2961        match self {
2962            AcfeDetectionMethod::Tip => 0.42,
2963            AcfeDetectionMethod::InternalAudit => 0.16,
2964            AcfeDetectionMethod::ManagementReview => 0.12,
2965            AcfeDetectionMethod::ExternalAudit => 0.04,
2966            AcfeDetectionMethod::AccountReconciliation => 0.05,
2967            AcfeDetectionMethod::DocumentExamination => 0.04,
2968            AcfeDetectionMethod::ByAccident => 0.06,
2969            AcfeDetectionMethod::ItControls => 0.03,
2970            AcfeDetectionMethod::Surveillance => 0.02,
2971            AcfeDetectionMethod::Confession => 0.02,
2972            AcfeDetectionMethod::LawEnforcement => 0.01,
2973            AcfeDetectionMethod::Other => 0.03,
2974        }
2975    }
2976
2977    /// Returns all variants for iteration.
2978    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2979        &[
2980            AcfeDetectionMethod::Tip,
2981            AcfeDetectionMethod::InternalAudit,
2982            AcfeDetectionMethod::ManagementReview,
2983            AcfeDetectionMethod::ExternalAudit,
2984            AcfeDetectionMethod::AccountReconciliation,
2985            AcfeDetectionMethod::DocumentExamination,
2986            AcfeDetectionMethod::ByAccident,
2987            AcfeDetectionMethod::ItControls,
2988            AcfeDetectionMethod::Surveillance,
2989            AcfeDetectionMethod::Confession,
2990            AcfeDetectionMethod::LawEnforcement,
2991            AcfeDetectionMethod::Other,
2992        ]
2993    }
2994}
2995
2996/// Department/position of perpetrator (from ACFE statistics).
2997#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2998pub enum PerpetratorDepartment {
2999    /// Accounting, finance, or bookkeeping.
3000    Accounting,
3001    /// Operations or manufacturing.
3002    Operations,
3003    /// Executive/upper management.
3004    Executive,
3005    /// Sales.
3006    Sales,
3007    /// Customer service.
3008    CustomerService,
3009    /// Purchasing/procurement.
3010    Purchasing,
3011    /// Information technology.
3012    It,
3013    /// Human resources.
3014    HumanResources,
3015    /// Administrative/clerical.
3016    Administrative,
3017    /// Warehouse/inventory.
3018    Warehouse,
3019    /// Board of directors.
3020    BoardOfDirectors,
3021    /// Other department.
3022    Other,
3023}
3024
3025impl PerpetratorDepartment {
3026    /// Returns the typical percentage of frauds by department (from ACFE reports).
3027    pub fn typical_occurrence_rate(&self) -> f64 {
3028        match self {
3029            PerpetratorDepartment::Accounting => 0.21,
3030            PerpetratorDepartment::Operations => 0.17,
3031            PerpetratorDepartment::Executive => 0.12,
3032            PerpetratorDepartment::Sales => 0.11,
3033            PerpetratorDepartment::CustomerService => 0.07,
3034            PerpetratorDepartment::Purchasing => 0.06,
3035            PerpetratorDepartment::It => 0.05,
3036            PerpetratorDepartment::HumanResources => 0.04,
3037            PerpetratorDepartment::Administrative => 0.04,
3038            PerpetratorDepartment::Warehouse => 0.03,
3039            PerpetratorDepartment::BoardOfDirectors => 0.02,
3040            PerpetratorDepartment::Other => 0.08,
3041        }
3042    }
3043
3044    /// Returns the typical median loss by perpetrator department.
3045    pub fn typical_median_loss(&self) -> Decimal {
3046        match self {
3047            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
3048            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
3049            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
3050            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
3051            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
3052            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
3053            PerpetratorDepartment::It => Decimal::new(100_000, 0),
3054            _ => Decimal::new(80_000, 0),
3055        }
3056    }
3057}
3058
3059/// Perpetrator position level (from ACFE statistics).
3060#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3061pub enum PerpetratorLevel {
3062    /// Entry-level employee.
3063    Employee,
3064    /// Manager or supervisor.
3065    Manager,
3066    /// Owner, executive, or C-level.
3067    OwnerExecutive,
3068}
3069
3070impl PerpetratorLevel {
3071    /// Returns the typical percentage of frauds by position level.
3072    pub fn typical_occurrence_rate(&self) -> f64 {
3073        match self {
3074            PerpetratorLevel::Employee => 0.42,
3075            PerpetratorLevel::Manager => 0.36,
3076            PerpetratorLevel::OwnerExecutive => 0.22,
3077        }
3078    }
3079
3080    /// Returns the typical median loss by position level.
3081    pub fn typical_median_loss(&self) -> Decimal {
3082        match self {
3083            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
3084            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
3085            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
3086        }
3087    }
3088}
3089
3090/// ACFE Calibration data for fraud generation.
3091///
3092/// Contains statistical parameters based on ACFE Report to the Nations
3093/// for realistic fraud pattern generation.
3094#[derive(Debug, Clone, Serialize, Deserialize)]
3095pub struct AcfeCalibration {
3096    /// Overall median loss for occupational fraud ($117,000 typical).
3097    pub median_loss: Decimal,
3098    /// Median duration in months before detection (12 months typical).
3099    pub median_duration_months: u32,
3100    /// Distribution of fraud by category.
3101    pub category_distribution: HashMap<String, f64>,
3102    /// Distribution of detection methods.
3103    pub detection_method_distribution: HashMap<String, f64>,
3104    /// Distribution by perpetrator department.
3105    pub department_distribution: HashMap<String, f64>,
3106    /// Distribution by perpetrator level.
3107    pub level_distribution: HashMap<String, f64>,
3108    /// Average number of red flags per fraud case.
3109    pub avg_red_flags_per_case: f64,
3110    /// Percentage of frauds involving collusion.
3111    pub collusion_rate: f64,
3112}
3113
3114impl Default for AcfeCalibration {
3115    fn default() -> Self {
3116        let mut category_distribution = HashMap::new();
3117        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
3118        category_distribution.insert("corruption".to_string(), 0.33);
3119        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
3120
3121        let mut detection_method_distribution = HashMap::new();
3122        for method in AcfeDetectionMethod::all_variants() {
3123            detection_method_distribution.insert(
3124                format!("{method:?}").to_lowercase(),
3125                method.typical_detection_rate(),
3126            );
3127        }
3128
3129        let mut department_distribution = HashMap::new();
3130        department_distribution.insert("accounting".to_string(), 0.21);
3131        department_distribution.insert("operations".to_string(), 0.17);
3132        department_distribution.insert("executive".to_string(), 0.12);
3133        department_distribution.insert("sales".to_string(), 0.11);
3134        department_distribution.insert("customer_service".to_string(), 0.07);
3135        department_distribution.insert("purchasing".to_string(), 0.06);
3136        department_distribution.insert("other".to_string(), 0.26);
3137
3138        let mut level_distribution = HashMap::new();
3139        level_distribution.insert("employee".to_string(), 0.42);
3140        level_distribution.insert("manager".to_string(), 0.36);
3141        level_distribution.insert("owner_executive".to_string(), 0.22);
3142
3143        Self {
3144            median_loss: Decimal::new(117_000, 0),
3145            median_duration_months: 12,
3146            category_distribution,
3147            detection_method_distribution,
3148            department_distribution,
3149            level_distribution,
3150            avg_red_flags_per_case: 2.8,
3151            collusion_rate: 0.50,
3152        }
3153    }
3154}
3155
3156impl AcfeCalibration {
3157    /// Creates a new ACFE calibration with the given parameters.
3158    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
3159        Self {
3160            median_loss,
3161            median_duration_months,
3162            ..Self::default()
3163        }
3164    }
3165
3166    /// Returns the median loss for a specific category.
3167    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
3168        category.typical_median_loss()
3169    }
3170
3171    /// Returns the median duration for a specific category.
3172    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
3173        category.typical_detection_months()
3174    }
3175
3176    /// Validates the calibration data.
3177    pub fn validate(&self) -> Result<(), String> {
3178        if self.median_loss <= Decimal::ZERO {
3179            return Err("Median loss must be positive".to_string());
3180        }
3181        if self.median_duration_months == 0 {
3182            return Err("Median duration must be at least 1 month".to_string());
3183        }
3184        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3185            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3186        }
3187        Ok(())
3188    }
3189}
3190
3191/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3192///
3193/// The fraud triangle is a model for explaining the factors that cause
3194/// someone to commit occupational fraud.
3195#[derive(Debug, Clone, Serialize, Deserialize)]
3196pub struct FraudTriangle {
3197    /// Pressure or incentive to commit fraud.
3198    pub pressure: PressureType,
3199    /// Opportunity factors that enable fraud.
3200    pub opportunities: Vec<OpportunityFactor>,
3201    /// Rationalization used to justify the fraud.
3202    pub rationalization: Rationalization,
3203}
3204
3205impl FraudTriangle {
3206    /// Creates a new fraud triangle.
3207    pub fn new(
3208        pressure: PressureType,
3209        opportunities: Vec<OpportunityFactor>,
3210        rationalization: Rationalization,
3211    ) -> Self {
3212        Self {
3213            pressure,
3214            opportunities,
3215            rationalization,
3216        }
3217    }
3218
3219    /// Returns a risk score based on the fraud triangle components.
3220    pub fn risk_score(&self) -> f64 {
3221        let pressure_score = self.pressure.risk_weight();
3222        let opportunity_score: f64 = self
3223            .opportunities
3224            .iter()
3225            .map(OpportunityFactor::risk_weight)
3226            .sum::<f64>()
3227            / self.opportunities.len().max(1) as f64;
3228        let rationalization_score = self.rationalization.risk_weight();
3229
3230        (pressure_score + opportunity_score + rationalization_score) / 3.0
3231    }
3232}
3233
3234/// Types of pressure/incentive that can lead to fraud.
3235#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3236pub enum PressureType {
3237    // Financial Pressures
3238    /// Personal financial difficulties (debt, lifestyle beyond means).
3239    PersonalFinancialDifficulties,
3240    /// Pressure to meet financial targets/earnings expectations.
3241    FinancialTargets,
3242    /// Market or analyst expectations.
3243    MarketExpectations,
3244    /// Debt covenant compliance requirements.
3245    CovenantCompliance,
3246    /// Credit rating maintenance.
3247    CreditRatingMaintenance,
3248    /// Acquisition/merger valuation pressure.
3249    AcquisitionValuation,
3250
3251    // Non-Financial Pressures
3252    /// Fear of job loss.
3253    JobSecurity,
3254    /// Pressure to maintain status or image.
3255    StatusMaintenance,
3256    /// Gambling addiction.
3257    GamblingAddiction,
3258    /// Substance abuse issues.
3259    SubstanceAbuse,
3260    /// Family pressure or obligations.
3261    FamilyPressure,
3262    /// Greed or desire for more.
3263    Greed,
3264}
3265
3266impl PressureType {
3267    /// Returns the risk weight (0.0-1.0) for this pressure type.
3268    pub fn risk_weight(&self) -> f64 {
3269        match self {
3270            PressureType::PersonalFinancialDifficulties => 0.80,
3271            PressureType::FinancialTargets => 0.75,
3272            PressureType::MarketExpectations => 0.70,
3273            PressureType::CovenantCompliance => 0.85,
3274            PressureType::CreditRatingMaintenance => 0.70,
3275            PressureType::AcquisitionValuation => 0.75,
3276            PressureType::JobSecurity => 0.65,
3277            PressureType::StatusMaintenance => 0.55,
3278            PressureType::GamblingAddiction => 0.90,
3279            PressureType::SubstanceAbuse => 0.85,
3280            PressureType::FamilyPressure => 0.60,
3281            PressureType::Greed => 0.70,
3282        }
3283    }
3284}
3285
3286/// Opportunity factors that enable fraud.
3287#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3288pub enum OpportunityFactor {
3289    /// Weak internal controls.
3290    WeakInternalControls,
3291    /// Lack of segregation of duties.
3292    LackOfSegregation,
3293    /// Override capability.
3294    ManagementOverride,
3295    /// Complex or unusual transactions.
3296    ComplexTransactions,
3297    /// Related party transactions.
3298    RelatedPartyTransactions,
3299    /// Poor tone at the top.
3300    PoorToneAtTop,
3301    /// Inadequate supervision.
3302    InadequateSupervision,
3303    /// Access to assets without accountability.
3304    AssetAccess,
3305    /// Inadequate record keeping.
3306    PoorRecordKeeping,
3307    /// Failure to discipline fraud perpetrators.
3308    LackOfDiscipline,
3309    /// Lack of independent checks.
3310    LackOfIndependentChecks,
3311}
3312
3313impl OpportunityFactor {
3314    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3315    pub fn risk_weight(&self) -> f64 {
3316        match self {
3317            OpportunityFactor::WeakInternalControls => 0.85,
3318            OpportunityFactor::LackOfSegregation => 0.80,
3319            OpportunityFactor::ManagementOverride => 0.90,
3320            OpportunityFactor::ComplexTransactions => 0.70,
3321            OpportunityFactor::RelatedPartyTransactions => 0.75,
3322            OpportunityFactor::PoorToneAtTop => 0.85,
3323            OpportunityFactor::InadequateSupervision => 0.75,
3324            OpportunityFactor::AssetAccess => 0.70,
3325            OpportunityFactor::PoorRecordKeeping => 0.65,
3326            OpportunityFactor::LackOfDiscipline => 0.60,
3327            OpportunityFactor::LackOfIndependentChecks => 0.75,
3328        }
3329    }
3330}
3331
3332/// Rationalizations used by fraud perpetrators.
3333#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3334pub enum Rationalization {
3335    /// "I'm just borrowing; I'll pay it back."
3336    TemporaryBorrowing,
3337    /// "Everyone does it."
3338    EveryoneDoesIt,
3339    /// "It's for the good of the company."
3340    ForTheCompanyGood,
3341    /// "I deserve this; the company owes me."
3342    Entitlement,
3343    /// "I was just following orders."
3344    FollowingOrders,
3345    /// "They won't miss it; they have plenty."
3346    TheyWontMissIt,
3347    /// "I need it more than they do."
3348    NeedItMore,
3349    /// "It's not really stealing."
3350    NotReallyStealing,
3351    /// "I'm underpaid for what I do."
3352    Underpaid,
3353    /// "It's a victimless crime."
3354    VictimlessCrime,
3355}
3356
3357impl Rationalization {
3358    /// Returns the risk weight (0.0-1.0) for this rationalization.
3359    pub fn risk_weight(&self) -> f64 {
3360        match self {
3361            // More dangerous rationalizations
3362            Rationalization::Entitlement => 0.85,
3363            Rationalization::EveryoneDoesIt => 0.80,
3364            Rationalization::NotReallyStealing => 0.80,
3365            Rationalization::TheyWontMissIt => 0.75,
3366            // Medium risk
3367            Rationalization::Underpaid => 0.70,
3368            Rationalization::ForTheCompanyGood => 0.65,
3369            Rationalization::NeedItMore => 0.65,
3370            // Lower risk (still indicates fraud)
3371            Rationalization::TemporaryBorrowing => 0.60,
3372            Rationalization::FollowingOrders => 0.55,
3373            Rationalization::VictimlessCrime => 0.60,
3374        }
3375    }
3376}
3377
3378// ============================================================================
3379// NEAR-MISS TYPES
3380// ============================================================================
3381
3382/// Type of near-miss pattern (suspicious but legitimate).
3383#[derive(Debug, Clone, Serialize, Deserialize)]
3384pub enum NearMissPattern {
3385    /// Transaction very similar to another (possible duplicate but legitimate).
3386    NearDuplicate {
3387        /// Date difference from similar transaction.
3388        date_difference_days: u32,
3389        /// Original transaction ID.
3390        similar_transaction_id: String,
3391    },
3392    /// Amount just below approval threshold (but legitimate).
3393    ThresholdProximity {
3394        /// The threshold being approached.
3395        threshold: Decimal,
3396        /// Percentage of threshold (0.0-1.0).
3397        proximity: f64,
3398    },
3399    /// Unusual but legitimate business pattern.
3400    UnusualLegitimate {
3401        /// Type of legitimate pattern.
3402        pattern_type: LegitimatePatternType,
3403        /// Business justification.
3404        justification: String,
3405    },
3406    /// Error that was caught and corrected.
3407    CorrectedError {
3408        /// Days until correction.
3409        correction_lag_days: u32,
3410        /// Correction document ID.
3411        correction_document_id: String,
3412    },
3413}
3414
3415/// Types of unusual but legitimate business patterns.
3416#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3417pub enum LegitimatePatternType {
3418    /// Year-end bonus payment.
3419    YearEndBonus,
3420    /// Contract prepayment.
3421    ContractPrepayment,
3422    /// Settlement payment.
3423    SettlementPayment,
3424    /// Insurance claim.
3425    InsuranceClaim,
3426    /// One-time vendor payment.
3427    OneTimePayment,
3428    /// Asset disposal.
3429    AssetDisposal,
3430    /// Seasonal inventory buildup.
3431    SeasonalInventory,
3432    /// Promotional spending.
3433    PromotionalSpending,
3434}
3435
3436impl LegitimatePatternType {
3437    /// Returns a description of this pattern type.
3438    pub fn description(&self) -> &'static str {
3439        match self {
3440            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3441            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3442            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3443            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3444            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3445            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3446            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3447            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3448        }
3449    }
3450}
3451
3452/// What might trigger a false positive for this near-miss.
3453#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3454pub enum FalsePositiveTrigger {
3455    /// Amount is near threshold.
3456    AmountNearThreshold,
3457    /// Timing is unusual.
3458    UnusualTiming,
3459    /// Similar to existing transaction.
3460    SimilarTransaction,
3461    /// New counterparty.
3462    NewCounterparty,
3463    /// Account combination unusual.
3464    UnusualAccountCombination,
3465    /// Volume spike.
3466    VolumeSpike,
3467    /// Round amount.
3468    RoundAmount,
3469}
3470
3471/// Label for a near-miss case.
3472#[derive(Debug, Clone, Serialize, Deserialize)]
3473pub struct NearMissLabel {
3474    /// Document ID.
3475    pub document_id: String,
3476    /// The near-miss pattern.
3477    pub pattern: NearMissPattern,
3478    /// How suspicious it appears (0.0-1.0).
3479    pub suspicion_score: f64,
3480    /// What would trigger a false positive.
3481    pub false_positive_trigger: FalsePositiveTrigger,
3482    /// Why this is actually legitimate.
3483    pub explanation: String,
3484}
3485
3486impl NearMissLabel {
3487    /// Creates a new near-miss label.
3488    pub fn new(
3489        document_id: impl Into<String>,
3490        pattern: NearMissPattern,
3491        suspicion_score: f64,
3492        trigger: FalsePositiveTrigger,
3493        explanation: impl Into<String>,
3494    ) -> Self {
3495        Self {
3496            document_id: document_id.into(),
3497            pattern,
3498            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3499            false_positive_trigger: trigger,
3500            explanation: explanation.into(),
3501        }
3502    }
3503}
3504
3505/// Configuration for anomaly rates.
3506#[derive(Debug, Clone, Serialize, Deserialize)]
3507pub struct AnomalyRateConfig {
3508    /// Overall anomaly rate (0.0 - 1.0).
3509    pub total_rate: f64,
3510    /// Fraud rate as proportion of anomalies.
3511    pub fraud_rate: f64,
3512    /// Error rate as proportion of anomalies.
3513    pub error_rate: f64,
3514    /// Process issue rate as proportion of anomalies.
3515    pub process_issue_rate: f64,
3516    /// Statistical anomaly rate as proportion of anomalies.
3517    pub statistical_rate: f64,
3518    /// Relational anomaly rate as proportion of anomalies.
3519    pub relational_rate: f64,
3520}
3521
3522impl Default for AnomalyRateConfig {
3523    fn default() -> Self {
3524        Self {
3525            total_rate: 0.02,         // 2% of transactions are anomalous
3526            fraud_rate: 0.25,         // 25% of anomalies are fraud
3527            error_rate: 0.35,         // 35% of anomalies are errors
3528            process_issue_rate: 0.20, // 20% are process issues
3529            statistical_rate: 0.15,   // 15% are statistical
3530            relational_rate: 0.05,    // 5% are relational
3531        }
3532    }
3533}
3534
3535impl AnomalyRateConfig {
3536    /// Validates that rates sum to approximately 1.0.
3537    pub fn validate(&self) -> Result<(), String> {
3538        let sum = self.fraud_rate
3539            + self.error_rate
3540            + self.process_issue_rate
3541            + self.statistical_rate
3542            + self.relational_rate;
3543
3544        if (sum - 1.0).abs() > 0.01 {
3545            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3546        }
3547
3548        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3549            return Err(format!(
3550                "Total rate must be between 0.0 and 1.0, got {}",
3551                self.total_rate
3552            ));
3553        }
3554
3555        Ok(())
3556    }
3557}
3558
3559#[cfg(test)]
3560mod tests {
3561    use super::*;
3562    use rust_decimal_macros::dec;
3563
3564    #[test]
3565    fn observability_class_maps_families_to_arms() {
3566        use ObservabilityClass::*;
3567        // Relational/graph families always surface in the account-flow graph.
3568        assert_eq!(
3569            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction)
3570                .observability_class(),
3571            RelationalGraph
3572        );
3573        assert_eq!(
3574            AnomalyType::Relational(RelationalAnomalyType::DormantAccountActivity)
3575                .observability_class(),
3576            RelationalGraph
3577        );
3578        // Multi-entity flow fraud → relational; recurrence fraud → memory; timing fraud → temporal.
3579        assert_eq!(
3580            AnomalyType::Fraud(FraudType::RoundTripping).observability_class(),
3581            RelationalGraph
3582        );
3583        assert_eq!(
3584            AnomalyType::Fraud(FraudType::DuplicatePayment).observability_class(),
3585            MemoryOnly
3586        );
3587        assert_eq!(
3588            AnomalyType::Fraud(FraudType::PrematureRevenue).observability_class(),
3589            Temporal
3590        );
3591        // Single-entry forensic signatures → per-JE density.
3592        assert_eq!(
3593            AnomalyType::Fraud(FraudType::RoundDollarManipulation).observability_class(),
3594            PerJeDensity
3595        );
3596        // Errors: period-timing → temporal, duplicate → memory, the rest → density.
3597        assert_eq!(
3598            AnomalyType::Error(ErrorType::WrongPeriod).observability_class(),
3599            Temporal
3600        );
3601        assert_eq!(
3602            AnomalyType::Error(ErrorType::DuplicateEntry).observability_class(),
3603            MemoryOnly
3604        );
3605        // Statistical: frequency-over-time → temporal, single-entry outlier → density.
3606        assert_eq!(
3607            AnomalyType::Statistical(StatisticalAnomalyType::TransactionBurst)
3608                .observability_class(),
3609            Temporal
3610        );
3611        assert_eq!(
3612            AnomalyType::Statistical(StatisticalAnomalyType::BenfordViolation)
3613                .observability_class(),
3614            PerJeDensity
3615        );
3616    }
3617
3618    #[test]
3619    fn labeled_anomaly_new_populates_observability_and_serializes_snake_case() {
3620        let a = LabeledAnomaly::new(
3621            "A1".to_string(),
3622            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction),
3623            "JE1".to_string(),
3624            "JE".to_string(),
3625            "1000".to_string(),
3626            NaiveDate::from_ymd_opt(2026, 1, 1).unwrap(),
3627        );
3628        // new() derives the field from the anomaly type.
3629        assert_eq!(a.observability, ObservabilityClass::RelationalGraph);
3630        // Serialized form is the stable snake_case label, and survives a round-trip.
3631        let json = serde_json::to_string(&a).expect("serialize");
3632        assert!(json.contains("\"observability\":\"relational_graph\""));
3633        assert_eq!(
3634            ObservabilityClass::RelationalGraph.as_str(),
3635            "relational_graph"
3636        );
3637        let back: LabeledAnomaly = serde_json::from_str(&json).expect("deserialize");
3638        assert_eq!(back.observability, ObservabilityClass::RelationalGraph);
3639        // Legacy labels without the field default to PerJeDensity rather than failing.
3640        let legacy = json.replacen(",\"observability\":\"relational_graph\"", "", 1);
3641        let parsed: LabeledAnomaly = serde_json::from_str(&legacy).expect("legacy deserialize");
3642        assert_eq!(parsed.observability, ObservabilityClass::PerJeDensity);
3643    }
3644
3645    #[test]
3646    fn test_anomaly_type_category() {
3647        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3648        assert_eq!(fraud.category(), "Fraud");
3649        assert!(fraud.is_intentional());
3650
3651        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3652        assert_eq!(error.category(), "Error");
3653        assert!(!error.is_intentional());
3654    }
3655
3656    #[test]
3657    fn test_labeled_anomaly() {
3658        let anomaly = LabeledAnomaly::new(
3659            "ANO001".to_string(),
3660            AnomalyType::Fraud(FraudType::SelfApproval),
3661            "JE001".to_string(),
3662            "JE".to_string(),
3663            "1000".to_string(),
3664            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3665        )
3666        .with_description("User approved their own expense report")
3667        .with_related_entity("USER001");
3668
3669        assert_eq!(anomaly.severity, 3);
3670        assert!(anomaly.is_injected);
3671        assert_eq!(anomaly.related_entities.len(), 1);
3672    }
3673
3674    #[test]
3675    fn test_labeled_anomaly_with_provenance() {
3676        let anomaly = LabeledAnomaly::new(
3677            "ANO001".to_string(),
3678            AnomalyType::Fraud(FraudType::SelfApproval),
3679            "JE001".to_string(),
3680            "JE".to_string(),
3681            "1000".to_string(),
3682            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3683        )
3684        .with_run_id("run-123")
3685        .with_generation_seed(42)
3686        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3687        .with_structured_strategy(InjectionStrategy::SelfApproval {
3688            user_id: "USER001".to_string(),
3689        })
3690        .with_scenario("scenario-001")
3691        .with_original_document_hash("abc123");
3692
3693        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3694        assert_eq!(anomaly.generation_seed, Some(42));
3695        assert!(anomaly.causal_reason.is_some());
3696        assert!(anomaly.structured_strategy.is_some());
3697        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3698        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3699
3700        // Check that legacy injection_strategy is also set
3701        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3702    }
3703
3704    #[test]
3705    fn test_labeled_anomaly_derivation_chain() {
3706        let parent = LabeledAnomaly::new(
3707            "ANO001".to_string(),
3708            AnomalyType::Fraud(FraudType::DuplicatePayment),
3709            "JE001".to_string(),
3710            "JE".to_string(),
3711            "1000".to_string(),
3712            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3713        );
3714
3715        let child = LabeledAnomaly::new(
3716            "ANO002".to_string(),
3717            AnomalyType::Error(ErrorType::DuplicateEntry),
3718            "JE002".to_string(),
3719            "JE".to_string(),
3720            "1000".to_string(),
3721            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3722        )
3723        .with_parent_anomaly(&parent.anomaly_id);
3724
3725        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3726    }
3727
3728    #[test]
3729    fn test_injection_strategy_description() {
3730        let strategy = InjectionStrategy::AmountManipulation {
3731            original: dec!(1000),
3732            factor: 2.5,
3733        };
3734        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3735        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3736
3737        let strategy = InjectionStrategy::ThresholdAvoidance {
3738            threshold: dec!(10000),
3739            adjusted_amount: dec!(9999),
3740        };
3741        assert_eq!(
3742            strategy.description(),
3743            "Amount adjusted to avoid 10000 threshold"
3744        );
3745
3746        let strategy = InjectionStrategy::DateShift {
3747            days_shifted: -5,
3748            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3749        };
3750        assert_eq!(strategy.description(), "Date backdated by 5 days");
3751
3752        let strategy = InjectionStrategy::DateShift {
3753            days_shifted: 3,
3754            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3755        };
3756        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3757    }
3758
3759    #[test]
3760    fn test_causal_reason_variants() {
3761        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3762        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3763            assert!((base_rate - 0.02).abs() < 0.001);
3764        }
3765
3766        let reason = AnomalyCausalReason::TemporalPattern {
3767            pattern_name: "year_end_spike".to_string(),
3768        };
3769        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3770            assert_eq!(pattern_name, "year_end_spike");
3771        }
3772
3773        let reason = AnomalyCausalReason::ScenarioStep {
3774            scenario_type: "kickback".to_string(),
3775            step_number: 3,
3776        };
3777        if let AnomalyCausalReason::ScenarioStep {
3778            scenario_type,
3779            step_number,
3780        } = reason
3781        {
3782            assert_eq!(scenario_type, "kickback");
3783            assert_eq!(step_number, 3);
3784        }
3785    }
3786
3787    #[test]
3788    fn test_feature_vector_length() {
3789        let anomaly = LabeledAnomaly::new(
3790            "ANO001".to_string(),
3791            AnomalyType::Fraud(FraudType::SelfApproval),
3792            "JE001".to_string(),
3793            "JE".to_string(),
3794            "1000".to_string(),
3795            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3796        );
3797
3798        let features = anomaly.to_features();
3799        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3800        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3801    }
3802
3803    #[test]
3804    fn test_feature_vector_with_provenance() {
3805        let anomaly = LabeledAnomaly::new(
3806            "ANO001".to_string(),
3807            AnomalyType::Fraud(FraudType::SelfApproval),
3808            "JE001".to_string(),
3809            "JE".to_string(),
3810            "1000".to_string(),
3811            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3812        )
3813        .with_scenario("scenario-001")
3814        .with_parent_anomaly("ANO000");
3815
3816        let features = anomaly.to_features();
3817
3818        // Last two features should be 1.0 (has scenario, has parent)
3819        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3820        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3821    }
3822
3823    #[test]
3824    fn test_anomaly_summary() {
3825        let anomalies = vec![
3826            LabeledAnomaly::new(
3827                "ANO001".to_string(),
3828                AnomalyType::Fraud(FraudType::SelfApproval),
3829                "JE001".to_string(),
3830                "JE".to_string(),
3831                "1000".to_string(),
3832                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3833            ),
3834            LabeledAnomaly::new(
3835                "ANO002".to_string(),
3836                AnomalyType::Error(ErrorType::DuplicateEntry),
3837                "JE002".to_string(),
3838                "JE".to_string(),
3839                "1000".to_string(),
3840                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3841            ),
3842        ];
3843
3844        let summary = AnomalySummary::from_anomalies(&anomalies);
3845
3846        assert_eq!(summary.total_count, 2);
3847        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3848        assert_eq!(summary.by_category.get("Error"), Some(&1));
3849    }
3850
3851    #[test]
3852    fn test_rate_config_validation() {
3853        let config = AnomalyRateConfig::default();
3854        assert!(config.validate().is_ok());
3855
3856        let bad_config = AnomalyRateConfig {
3857            fraud_rate: 0.5,
3858            error_rate: 0.5,
3859            process_issue_rate: 0.5, // Sum > 1.0
3860            ..Default::default()
3861        };
3862        assert!(bad_config.validate().is_err());
3863    }
3864
3865    #[test]
3866    fn test_injection_strategy_serialization() {
3867        let strategy = InjectionStrategy::SoDViolation {
3868            duty1: "CreatePO".to_string(),
3869            duty2: "ApprovePO".to_string(),
3870            violating_user: "USER001".to_string(),
3871        };
3872
3873        let json = serde_json::to_string(&strategy).unwrap();
3874        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3875
3876        assert_eq!(strategy, deserialized);
3877    }
3878
3879    #[test]
3880    fn test_labeled_anomaly_serialization_with_provenance() {
3881        let anomaly = LabeledAnomaly::new(
3882            "ANO001".to_string(),
3883            AnomalyType::Fraud(FraudType::SelfApproval),
3884            "JE001".to_string(),
3885            "JE".to_string(),
3886            "1000".to_string(),
3887            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3888        )
3889        .with_run_id("run-123")
3890        .with_generation_seed(42)
3891        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3892
3893        let json = serde_json::to_string(&anomaly).unwrap();
3894        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3895
3896        assert_eq!(anomaly.run_id, deserialized.run_id);
3897        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3898    }
3899
3900    // ========================================
3901    // FR-003 ENHANCED TAXONOMY TESTS
3902    // ========================================
3903
3904    #[test]
3905    fn test_anomaly_category_from_anomaly_type() {
3906        // Fraud mappings
3907        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3908        assert_eq!(
3909            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3910            AnomalyCategory::FictitiousVendor
3911        );
3912
3913        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3914        assert_eq!(
3915            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3916            AnomalyCategory::VendorKickback
3917        );
3918
3919        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3920        assert_eq!(
3921            AnomalyCategory::from_anomaly_type(&fraud_structured),
3922            AnomalyCategory::StructuredTransaction
3923        );
3924
3925        // Error mappings
3926        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3927        assert_eq!(
3928            AnomalyCategory::from_anomaly_type(&error_duplicate),
3929            AnomalyCategory::DuplicatePayment
3930        );
3931
3932        // Process issue mappings
3933        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3934        assert_eq!(
3935            AnomalyCategory::from_anomaly_type(&process_skip),
3936            AnomalyCategory::MissingApproval
3937        );
3938
3939        // Relational mappings
3940        let relational_circular =
3941            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3942        assert_eq!(
3943            AnomalyCategory::from_anomaly_type(&relational_circular),
3944            AnomalyCategory::CircularFlow
3945        );
3946    }
3947
3948    #[test]
3949    fn test_anomaly_category_ordinal() {
3950        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3951        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3952        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3953    }
3954
3955    #[test]
3956    fn test_contributing_factor() {
3957        let factor = ContributingFactor::new(
3958            FactorType::AmountDeviation,
3959            15000.0,
3960            10000.0,
3961            true,
3962            0.5,
3963            "Amount exceeds threshold",
3964        );
3965
3966        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3967        assert_eq!(factor.value, 15000.0);
3968        assert_eq!(factor.threshold, 10000.0);
3969        assert!(factor.direction_greater);
3970
3971        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3972        let contribution = factor.contribution();
3973        assert!((contribution - 0.25).abs() < 0.01);
3974    }
3975
3976    #[test]
3977    fn test_contributing_factor_with_evidence() {
3978        let mut data = HashMap::new();
3979        data.insert("expected".to_string(), "10000".to_string());
3980        data.insert("actual".to_string(), "15000".to_string());
3981
3982        let factor = ContributingFactor::new(
3983            FactorType::AmountDeviation,
3984            15000.0,
3985            10000.0,
3986            true,
3987            0.5,
3988            "Amount deviation detected",
3989        )
3990        .with_evidence("transaction_history", data);
3991
3992        assert!(factor.evidence.is_some());
3993        let evidence = factor.evidence.unwrap();
3994        assert_eq!(evidence.source, "transaction_history");
3995        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3996    }
3997
3998    #[test]
3999    fn test_enhanced_anomaly_label() {
4000        let base = LabeledAnomaly::new(
4001            "ANO001".to_string(),
4002            AnomalyType::Fraud(FraudType::DuplicatePayment),
4003            "JE001".to_string(),
4004            "JE".to_string(),
4005            "1000".to_string(),
4006            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
4007        );
4008
4009        let enhanced = EnhancedAnomalyLabel::from_base(base)
4010            .with_confidence(0.85)
4011            .with_severity(0.7)
4012            .with_factor(ContributingFactor::new(
4013                FactorType::DuplicateIndicator,
4014                1.0,
4015                0.5,
4016                true,
4017                0.4,
4018                "Duplicate payment detected",
4019            ))
4020            .with_secondary_category(AnomalyCategory::StructuredTransaction);
4021
4022        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
4023        assert_eq!(enhanced.enhanced_confidence, 0.85);
4024        assert_eq!(enhanced.enhanced_severity, 0.7);
4025        assert_eq!(enhanced.contributing_factors.len(), 1);
4026        assert_eq!(enhanced.secondary_categories.len(), 1);
4027    }
4028
4029    #[test]
4030    fn test_enhanced_anomaly_label_features() {
4031        let base = LabeledAnomaly::new(
4032            "ANO001".to_string(),
4033            AnomalyType::Fraud(FraudType::SelfApproval),
4034            "JE001".to_string(),
4035            "JE".to_string(),
4036            "1000".to_string(),
4037            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
4038        );
4039
4040        let enhanced = EnhancedAnomalyLabel::from_base(base)
4041            .with_confidence(0.9)
4042            .with_severity(0.8)
4043            .with_factor(ContributingFactor::new(
4044                FactorType::ControlBypass,
4045                1.0,
4046                0.0,
4047                true,
4048                0.5,
4049                "Control bypass detected",
4050            ));
4051
4052        let features = enhanced.to_features();
4053
4054        // Should have 25 features (15 base + 10 enhanced)
4055        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
4056        assert_eq!(features.len(), 25);
4057
4058        // Check enhanced confidence is in features
4059        assert_eq!(features[15], 0.9); // enhanced_confidence
4060
4061        // Check has_control_bypass flag
4062        assert_eq!(features[21], 1.0); // has_control_bypass
4063    }
4064
4065    #[test]
4066    fn test_enhanced_anomaly_label_feature_names() {
4067        let names = EnhancedAnomalyLabel::feature_names();
4068        assert_eq!(names.len(), 25);
4069        assert!(names.contains(&"enhanced_confidence"));
4070        assert!(names.contains(&"enhanced_severity"));
4071        assert!(names.contains(&"has_control_bypass"));
4072    }
4073
4074    #[test]
4075    fn test_factor_type_names() {
4076        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
4077        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
4078        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
4079    }
4080
4081    #[test]
4082    fn test_anomaly_category_serialization() {
4083        let category = AnomalyCategory::CircularFlow;
4084        let json = serde_json::to_string(&category).unwrap();
4085        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
4086        assert_eq!(category, deserialized);
4087
4088        let custom = AnomalyCategory::Custom("custom_type".to_string());
4089        let json = serde_json::to_string(&custom).unwrap();
4090        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
4091        assert_eq!(custom, deserialized);
4092    }
4093
4094    #[test]
4095    fn test_enhanced_label_secondary_category_dedup() {
4096        let base = LabeledAnomaly::new(
4097            "ANO001".to_string(),
4098            AnomalyType::Fraud(FraudType::DuplicatePayment),
4099            "JE001".to_string(),
4100            "JE".to_string(),
4101            "1000".to_string(),
4102            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
4103        );
4104
4105        let enhanced = EnhancedAnomalyLabel::from_base(base)
4106            // Try to add the primary category as secondary (should be ignored)
4107            .with_secondary_category(AnomalyCategory::DuplicatePayment)
4108            // Add a valid secondary
4109            .with_secondary_category(AnomalyCategory::TimingAnomaly)
4110            // Try to add duplicate secondary (should be ignored)
4111            .with_secondary_category(AnomalyCategory::TimingAnomaly);
4112
4113        // Should only have 1 secondary category (TimingAnomaly)
4114        assert_eq!(enhanced.secondary_categories.len(), 1);
4115        assert_eq!(
4116            enhanced.secondary_categories[0],
4117            AnomalyCategory::TimingAnomaly
4118        );
4119    }
4120
4121    // ==========================================================================
4122    // Accounting Standards Fraud Type Tests
4123    // ==========================================================================
4124
4125    #[test]
4126    fn test_revenue_recognition_fraud_types() {
4127        // Test ASC 606/IFRS 15 related fraud types
4128        let fraud_types = [
4129            FraudType::ImproperRevenueRecognition,
4130            FraudType::ImproperPoAllocation,
4131            FraudType::VariableConsiderationManipulation,
4132            FraudType::ContractModificationMisstatement,
4133        ];
4134
4135        for fraud_type in fraud_types {
4136            let anomaly_type = AnomalyType::Fraud(fraud_type);
4137            assert_eq!(anomaly_type.category(), "Fraud");
4138            assert!(anomaly_type.is_intentional());
4139            assert!(anomaly_type.severity() >= 3);
4140        }
4141    }
4142
4143    #[test]
4144    fn test_lease_accounting_fraud_types() {
4145        // Test ASC 842/IFRS 16 related fraud types
4146        let fraud_types = [
4147            FraudType::LeaseClassificationManipulation,
4148            FraudType::OffBalanceSheetLease,
4149            FraudType::LeaseLiabilityUnderstatement,
4150            FraudType::RouAssetMisstatement,
4151        ];
4152
4153        for fraud_type in fraud_types {
4154            let anomaly_type = AnomalyType::Fraud(fraud_type);
4155            assert_eq!(anomaly_type.category(), "Fraud");
4156            assert!(anomaly_type.is_intentional());
4157            assert!(anomaly_type.severity() >= 3);
4158        }
4159
4160        // Off-balance sheet lease fraud should be high severity
4161        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
4162    }
4163
4164    #[test]
4165    fn test_fair_value_fraud_types() {
4166        // Test ASC 820/IFRS 13 related fraud types
4167        let fraud_types = [
4168            FraudType::FairValueHierarchyManipulation,
4169            FraudType::Level3InputManipulation,
4170            FraudType::ValuationTechniqueManipulation,
4171        ];
4172
4173        for fraud_type in fraud_types {
4174            let anomaly_type = AnomalyType::Fraud(fraud_type);
4175            assert_eq!(anomaly_type.category(), "Fraud");
4176            assert!(anomaly_type.is_intentional());
4177            assert!(anomaly_type.severity() >= 4);
4178        }
4179
4180        // Level 3 manipulation is highest severity (unobservable inputs)
4181        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
4182    }
4183
4184    #[test]
4185    fn test_impairment_fraud_types() {
4186        // Test ASC 360/IAS 36 related fraud types
4187        let fraud_types = [
4188            FraudType::DelayedImpairment,
4189            FraudType::ImpairmentTestAvoidance,
4190            FraudType::CashFlowProjectionManipulation,
4191            FraudType::ImproperImpairmentReversal,
4192        ];
4193
4194        for fraud_type in fraud_types {
4195            let anomaly_type = AnomalyType::Fraud(fraud_type);
4196            assert_eq!(anomaly_type.category(), "Fraud");
4197            assert!(anomaly_type.is_intentional());
4198            assert!(anomaly_type.severity() >= 3);
4199        }
4200
4201        // Cash flow manipulation has highest severity
4202        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
4203    }
4204
4205    // ==========================================================================
4206    // Accounting Standards Error Type Tests
4207    // ==========================================================================
4208
4209    #[test]
4210    fn test_standards_error_types() {
4211        // Test non-fraudulent accounting standards errors
4212        let error_types = [
4213            ErrorType::RevenueTimingError,
4214            ErrorType::PoAllocationError,
4215            ErrorType::LeaseClassificationError,
4216            ErrorType::LeaseCalculationError,
4217            ErrorType::FairValueError,
4218            ErrorType::ImpairmentCalculationError,
4219            ErrorType::DiscountRateError,
4220            ErrorType::FrameworkApplicationError,
4221        ];
4222
4223        for error_type in error_types {
4224            let anomaly_type = AnomalyType::Error(error_type);
4225            assert_eq!(anomaly_type.category(), "Error");
4226            assert!(!anomaly_type.is_intentional());
4227            assert!(anomaly_type.severity() >= 3);
4228        }
4229    }
4230
4231    #[test]
4232    fn test_framework_application_error() {
4233        // Test IFRS vs GAAP confusion errors
4234        let error_type = ErrorType::FrameworkApplicationError;
4235        assert_eq!(error_type.severity(), 4);
4236
4237        let anomaly = LabeledAnomaly::new(
4238            "ERR001".to_string(),
4239            AnomalyType::Error(error_type),
4240            "JE100".to_string(),
4241            "JE".to_string(),
4242            "1000".to_string(),
4243            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
4244        )
4245        .with_description("LIFO inventory method used under IFRS (not permitted)")
4246        .with_metadata("framework", "IFRS")
4247        .with_metadata("standard_violated", "IAS 2");
4248
4249        assert_eq!(anomaly.anomaly_type.category(), "Error");
4250        assert_eq!(
4251            anomaly.metadata.get("standard_violated"),
4252            Some(&"IAS 2".to_string())
4253        );
4254    }
4255
4256    #[test]
4257    fn test_standards_anomaly_serialization() {
4258        // Test that new fraud types serialize/deserialize correctly
4259        let fraud_types = [
4260            FraudType::ImproperRevenueRecognition,
4261            FraudType::LeaseClassificationManipulation,
4262            FraudType::FairValueHierarchyManipulation,
4263            FraudType::DelayedImpairment,
4264        ];
4265
4266        for fraud_type in fraud_types {
4267            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4268            let deserialized: FraudType =
4269                serde_json::from_str(&json).expect("Failed to deserialize");
4270            assert_eq!(fraud_type, deserialized);
4271        }
4272
4273        // Test error types
4274        let error_types = [
4275            ErrorType::RevenueTimingError,
4276            ErrorType::LeaseCalculationError,
4277            ErrorType::FairValueError,
4278            ErrorType::FrameworkApplicationError,
4279        ];
4280
4281        for error_type in error_types {
4282            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4283            let deserialized: ErrorType =
4284                serde_json::from_str(&json).expect("Failed to deserialize");
4285            assert_eq!(error_type, deserialized);
4286        }
4287    }
4288
4289    #[test]
4290    fn test_standards_labeled_anomaly() {
4291        // Test creating a labeled anomaly for a standards violation
4292        let anomaly = LabeledAnomaly::new(
4293            "STD001".to_string(),
4294            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4295            "CONTRACT-2024-001".to_string(),
4296            "Revenue".to_string(),
4297            "1000".to_string(),
4298            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4299        )
4300        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4301        .with_monetary_impact(dec!(500000))
4302        .with_metadata("standard", "ASC 606")
4303        .with_metadata("paragraph", "606-10-25-1")
4304        .with_metadata("contract_id", "C-2024-001")
4305        .with_related_entity("CONTRACT-2024-001")
4306        .with_related_entity("CUSTOMER-500");
4307
4308        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4309        assert!(anomaly.is_injected);
4310        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4311        assert_eq!(anomaly.related_entities.len(), 2);
4312        assert_eq!(
4313            anomaly.metadata.get("standard"),
4314            Some(&"ASC 606".to_string())
4315        );
4316    }
4317
4318    // ==========================================================================
4319    // Multi-Dimensional Labeling Tests
4320    // ==========================================================================
4321
4322    #[test]
4323    fn test_severity_level() {
4324        assert_eq!(SeverityLevel::Low.numeric(), 1);
4325        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4326
4327        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4328        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4329
4330        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4331        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4332
4333        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4334    }
4335
4336    #[test]
4337    fn test_anomaly_severity() {
4338        let severity =
4339            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4340
4341        assert_eq!(severity.level, SeverityLevel::High);
4342        assert!(severity.is_material);
4343        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4344
4345        // Not material
4346        let low_severity =
4347            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4348        assert!(!low_severity.is_material);
4349    }
4350
4351    #[test]
4352    fn test_detection_difficulty() {
4353        assert!(
4354            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4355        );
4356        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4357
4358        assert_eq!(
4359            AnomalyDetectionDifficulty::from_score(0.05),
4360            AnomalyDetectionDifficulty::Trivial
4361        );
4362        assert_eq!(
4363            AnomalyDetectionDifficulty::from_score(0.90),
4364            AnomalyDetectionDifficulty::Expert
4365        );
4366
4367        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4368    }
4369
4370    #[test]
4371    fn test_ground_truth_certainty() {
4372        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4373        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4374        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4375    }
4376
4377    #[test]
4378    fn test_detection_method() {
4379        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4380        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4381    }
4382
4383    #[test]
4384    fn test_extended_anomaly_label() {
4385        let base = LabeledAnomaly::new(
4386            "ANO001".to_string(),
4387            AnomalyType::Fraud(FraudType::FictitiousVendor),
4388            "JE001".to_string(),
4389            "JE".to_string(),
4390            "1000".to_string(),
4391            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4392        )
4393        .with_monetary_impact(dec!(100000));
4394
4395        let extended = ExtendedAnomalyLabel::from_base(base)
4396            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4397            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4398            .with_method(DetectionMethod::GraphBased)
4399            .with_method(DetectionMethod::ForensicAudit)
4400            .with_indicator("New vendor with no history")
4401            .with_indicator("Large first transaction")
4402            .with_certainty(GroundTruthCertainty::Definite)
4403            .with_entity("V001")
4404            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4405            .with_scheme("SCHEME001", 2);
4406
4407        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4408        assert_eq!(
4409            extended.detection_difficulty,
4410            AnomalyDetectionDifficulty::Hard
4411        );
4412        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4413        assert_eq!(extended.recommended_methods.len(), 3);
4414        assert_eq!(extended.key_indicators.len(), 2);
4415        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4416        assert_eq!(extended.scheme_stage, Some(2));
4417    }
4418
4419    #[test]
4420    fn test_extended_anomaly_label_features() {
4421        let base = LabeledAnomaly::new(
4422            "ANO001".to_string(),
4423            AnomalyType::Fraud(FraudType::SelfApproval),
4424            "JE001".to_string(),
4425            "JE".to_string(),
4426            "1000".to_string(),
4427            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4428        );
4429
4430        let extended =
4431            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4432
4433        let features = extended.to_features();
4434        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4435        assert_eq!(features.len(), 30);
4436
4437        // Check difficulty score is in features
4438        let difficulty_idx = 18; // Position of difficulty_score
4439        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4440    }
4441
4442    #[test]
4443    fn test_extended_label_near_miss() {
4444        let base = LabeledAnomaly::new(
4445            "ANO001".to_string(),
4446            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4447            "JE001".to_string(),
4448            "JE".to_string(),
4449            "1000".to_string(),
4450            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4451        );
4452
4453        let extended = ExtendedAnomalyLabel::from_base(base)
4454            .as_near_miss("Year-end bonus payment, legitimately high");
4455
4456        assert!(extended.is_near_miss);
4457        assert!(extended.near_miss_explanation.is_some());
4458    }
4459
4460    #[test]
4461    fn test_scheme_type() {
4462        assert_eq!(
4463            SchemeType::GradualEmbezzlement.name(),
4464            "gradual_embezzlement"
4465        );
4466        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4467        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4468    }
4469
4470    #[test]
4471    fn test_concealment_technique() {
4472        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4473        assert!(
4474            ConcealmentTechnique::Collusion.difficulty_bonus()
4475                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4476        );
4477    }
4478
4479    #[test]
4480    fn test_near_miss_label() {
4481        let near_miss = NearMissLabel::new(
4482            "JE001",
4483            NearMissPattern::ThresholdProximity {
4484                threshold: dec!(10000),
4485                proximity: 0.95,
4486            },
4487            0.7,
4488            FalsePositiveTrigger::AmountNearThreshold,
4489            "Transaction is 95% of threshold but business justified",
4490        );
4491
4492        assert_eq!(near_miss.document_id, "JE001");
4493        assert_eq!(near_miss.suspicion_score, 0.7);
4494        assert_eq!(
4495            near_miss.false_positive_trigger,
4496            FalsePositiveTrigger::AmountNearThreshold
4497        );
4498    }
4499
4500    #[test]
4501    fn test_legitimate_pattern_type() {
4502        assert_eq!(
4503            LegitimatePatternType::YearEndBonus.description(),
4504            "Year-end bonus payment"
4505        );
4506        assert_eq!(
4507            LegitimatePatternType::InsuranceClaim.description(),
4508            "Insurance claim reimbursement"
4509        );
4510    }
4511
4512    #[test]
4513    fn test_severity_detection_difficulty_serialization() {
4514        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4515        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4516        let deserialized: AnomalySeverity =
4517            serde_json::from_str(&json).expect("Failed to deserialize");
4518        assert_eq!(severity.level, deserialized.level);
4519
4520        let difficulty = AnomalyDetectionDifficulty::Hard;
4521        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4522        let deserialized: AnomalyDetectionDifficulty =
4523            serde_json::from_str(&json).expect("Failed to deserialize");
4524        assert_eq!(difficulty, deserialized);
4525    }
4526
4527    // ========================================
4528    // ACFE Taxonomy Tests
4529    // ========================================
4530
4531    #[test]
4532    fn test_acfe_fraud_category() {
4533        let asset = AcfeFraudCategory::AssetMisappropriation;
4534        assert_eq!(asset.name(), "asset_misappropriation");
4535        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4536        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4537        assert_eq!(asset.typical_detection_months(), 12);
4538
4539        let corruption = AcfeFraudCategory::Corruption;
4540        assert_eq!(corruption.name(), "corruption");
4541        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4542
4543        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4544        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4545        assert_eq!(fs_fraud.typical_detection_months(), 24);
4546    }
4547
4548    #[test]
4549    fn test_cash_fraud_scheme() {
4550        let shell = CashFraudScheme::ShellCompany;
4551        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4552        assert_eq!(shell.subcategory(), "billing_schemes");
4553        assert_eq!(shell.severity(), 5);
4554        assert_eq!(
4555            shell.detection_difficulty(),
4556            AnomalyDetectionDifficulty::Hard
4557        );
4558
4559        let ghost = CashFraudScheme::GhostEmployee;
4560        assert_eq!(ghost.subcategory(), "payroll_schemes");
4561        assert_eq!(ghost.severity(), 5);
4562
4563        // Test all variants exist
4564        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4565    }
4566
4567    #[test]
4568    fn test_asset_fraud_scheme() {
4569        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4570        assert_eq!(
4571            ip_theft.category(),
4572            AcfeFraudCategory::AssetMisappropriation
4573        );
4574        assert_eq!(ip_theft.subcategory(), "other_assets");
4575        assert_eq!(ip_theft.severity(), 5);
4576
4577        let inv_theft = AssetFraudScheme::InventoryTheft;
4578        assert_eq!(inv_theft.subcategory(), "inventory");
4579        assert_eq!(inv_theft.severity(), 4);
4580    }
4581
4582    #[test]
4583    fn test_corruption_scheme() {
4584        let kickback = CorruptionScheme::InvoiceKickback;
4585        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4586        assert_eq!(kickback.subcategory(), "bribery");
4587        assert_eq!(kickback.severity(), 5);
4588        assert_eq!(
4589            kickback.detection_difficulty(),
4590            AnomalyDetectionDifficulty::Expert
4591        );
4592
4593        let bid_rigging = CorruptionScheme::BidRigging;
4594        assert_eq!(bid_rigging.subcategory(), "bribery");
4595        assert_eq!(
4596            bid_rigging.detection_difficulty(),
4597            AnomalyDetectionDifficulty::Hard
4598        );
4599
4600        let purchasing = CorruptionScheme::PurchasingConflict;
4601        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4602
4603        // Test all variants exist
4604        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4605    }
4606
4607    #[test]
4608    fn test_financial_statement_scheme() {
4609        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4610        assert_eq!(
4611            fictitious.category(),
4612            AcfeFraudCategory::FinancialStatementFraud
4613        );
4614        assert_eq!(fictitious.subcategory(), "overstatement");
4615        assert_eq!(fictitious.severity(), 5);
4616        assert_eq!(
4617            fictitious.detection_difficulty(),
4618            AnomalyDetectionDifficulty::Expert
4619        );
4620
4621        let understated = FinancialStatementScheme::UnderstatedRevenues;
4622        assert_eq!(understated.subcategory(), "understatement");
4623
4624        // Test all variants exist
4625        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4626    }
4627
4628    #[test]
4629    fn test_acfe_scheme_unified() {
4630        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4631        assert_eq!(
4632            cash_scheme.category(),
4633            AcfeFraudCategory::AssetMisappropriation
4634        );
4635        assert_eq!(cash_scheme.severity(), 5);
4636
4637        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4638        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4639
4640        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4641        assert_eq!(
4642            fs_scheme.category(),
4643            AcfeFraudCategory::FinancialStatementFraud
4644        );
4645    }
4646
4647    #[test]
4648    fn test_acfe_detection_method() {
4649        let tip = AcfeDetectionMethod::Tip;
4650        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4651
4652        let internal_audit = AcfeDetectionMethod::InternalAudit;
4653        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4654
4655        let external_audit = AcfeDetectionMethod::ExternalAudit;
4656        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4657
4658        // Test all variants exist
4659        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4660    }
4661
4662    #[test]
4663    fn test_perpetrator_department() {
4664        let accounting = PerpetratorDepartment::Accounting;
4665        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4666        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4667
4668        let executive = PerpetratorDepartment::Executive;
4669        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4670    }
4671
4672    #[test]
4673    fn test_perpetrator_level() {
4674        let employee = PerpetratorLevel::Employee;
4675        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4676        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4677
4678        let exec = PerpetratorLevel::OwnerExecutive;
4679        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4680    }
4681
4682    #[test]
4683    fn test_acfe_calibration() {
4684        let cal = AcfeCalibration::default();
4685        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4686        assert_eq!(cal.median_duration_months, 12);
4687        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4688        assert!(cal.validate().is_ok());
4689
4690        // Test custom calibration
4691        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4692        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4693        assert_eq!(custom_cal.median_duration_months, 18);
4694
4695        // Test validation failure
4696        let bad_cal = AcfeCalibration {
4697            collusion_rate: 1.5,
4698            ..Default::default()
4699        };
4700        assert!(bad_cal.validate().is_err());
4701    }
4702
4703    #[test]
4704    fn test_fraud_triangle() {
4705        let triangle = FraudTriangle::new(
4706            PressureType::FinancialTargets,
4707            vec![
4708                OpportunityFactor::WeakInternalControls,
4709                OpportunityFactor::ManagementOverride,
4710            ],
4711            Rationalization::ForTheCompanyGood,
4712        );
4713
4714        // Risk score should be between 0 and 1
4715        let risk = triangle.risk_score();
4716        assert!((0.0..=1.0).contains(&risk));
4717        // Should be relatively high given the components
4718        assert!(risk > 0.5);
4719    }
4720
4721    #[test]
4722    fn test_pressure_types() {
4723        let financial = PressureType::FinancialTargets;
4724        assert!(financial.risk_weight() > 0.5);
4725
4726        let gambling = PressureType::GamblingAddiction;
4727        assert_eq!(gambling.risk_weight(), 0.90);
4728    }
4729
4730    #[test]
4731    fn test_opportunity_factors() {
4732        let override_factor = OpportunityFactor::ManagementOverride;
4733        assert_eq!(override_factor.risk_weight(), 0.90);
4734
4735        let weak_controls = OpportunityFactor::WeakInternalControls;
4736        assert!(weak_controls.risk_weight() > 0.8);
4737    }
4738
4739    #[test]
4740    fn test_rationalizations() {
4741        let entitlement = Rationalization::Entitlement;
4742        assert!(entitlement.risk_weight() > 0.8);
4743
4744        let borrowing = Rationalization::TemporaryBorrowing;
4745        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4746    }
4747
4748    #[test]
4749    fn test_acfe_scheme_serialization() {
4750        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4751        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4752        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4753        assert_eq!(scheme, deserialized);
4754
4755        let calibration = AcfeCalibration::default();
4756        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4757        let deserialized: AcfeCalibration =
4758            serde_json::from_str(&json).expect("Failed to deserialize");
4759        assert_eq!(calibration.median_loss, deserialized.median_loss);
4760    }
4761}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs