datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {:.2}", factor)
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {} threshold", threshold)
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {} days", days_shifted)
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {}", user_id)
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {} and {}", duty1, duty2)
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {}", original_doc_id)
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!(
194                    "Near-duplicate of {} (varied: {:?})",
195                    original_doc_id, varied_fields
196                )
197            }
198            InjectionStrategy::CircularFlow { entity_chain } => {
199                format!("Circular flow through {} entities", entity_chain.len())
200            }
201            InjectionStrategy::SplitTransaction { split_count, .. } => {
202                format!("Split into {} transactions", split_count)
203            }
204            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
205            InjectionStrategy::TimingManipulation { timing_type, .. } => {
206                format!("Timing manipulation: {}", timing_type)
207            }
208            InjectionStrategy::AccountMisclassification {
209                correct_account,
210                incorrect_account,
211            } => {
212                format!(
213                    "Misclassified from {} to {}",
214                    correct_account, incorrect_account
215                )
216            }
217            InjectionStrategy::MissingField { field_name } => {
218                format!("Missing required field: {}", field_name)
219            }
220            InjectionStrategy::Custom { name, .. } => format!("Custom: {}", name),
221        }
222    }
223
224    /// Returns the strategy type name.
225    pub fn strategy_type(&self) -> &'static str {
226        match self {
227            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
228            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
229            InjectionStrategy::DateShift { .. } => "DateShift",
230            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
231            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
232            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
233            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
234            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
235            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
236            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
237            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
238            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
239            InjectionStrategy::MissingField { .. } => "MissingField",
240            InjectionStrategy::Custom { .. } => "Custom",
241        }
242    }
243}
244
245/// Primary anomaly classification.
246#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
247pub enum AnomalyType {
248    /// Fraudulent activity.
249    Fraud(FraudType),
250    /// Data entry or processing error.
251    Error(ErrorType),
252    /// Process or control issue.
253    ProcessIssue(ProcessIssueType),
254    /// Statistical anomaly.
255    Statistical(StatisticalAnomalyType),
256    /// Relational/graph anomaly.
257    Relational(RelationalAnomalyType),
258    /// Custom anomaly type.
259    Custom(String),
260}
261
262impl AnomalyType {
263    /// Returns the category name.
264    pub fn category(&self) -> &'static str {
265        match self {
266            AnomalyType::Fraud(_) => "Fraud",
267            AnomalyType::Error(_) => "Error",
268            AnomalyType::ProcessIssue(_) => "ProcessIssue",
269            AnomalyType::Statistical(_) => "Statistical",
270            AnomalyType::Relational(_) => "Relational",
271            AnomalyType::Custom(_) => "Custom",
272        }
273    }
274
275    /// Returns the specific type name.
276    pub fn type_name(&self) -> String {
277        match self {
278            AnomalyType::Fraud(t) => format!("{:?}", t),
279            AnomalyType::Error(t) => format!("{:?}", t),
280            AnomalyType::ProcessIssue(t) => format!("{:?}", t),
281            AnomalyType::Statistical(t) => format!("{:?}", t),
282            AnomalyType::Relational(t) => format!("{:?}", t),
283            AnomalyType::Custom(s) => s.clone(),
284        }
285    }
286
287    /// Returns the severity level (1-5, 5 being most severe).
288    pub fn severity(&self) -> u8 {
289        match self {
290            AnomalyType::Fraud(t) => t.severity(),
291            AnomalyType::Error(t) => t.severity(),
292            AnomalyType::ProcessIssue(t) => t.severity(),
293            AnomalyType::Statistical(t) => t.severity(),
294            AnomalyType::Relational(t) => t.severity(),
295            AnomalyType::Custom(_) => 3,
296        }
297    }
298
299    /// Returns whether this anomaly is typically intentional.
300    pub fn is_intentional(&self) -> bool {
301        matches!(self, AnomalyType::Fraud(_))
302    }
303}
304
305/// Fraud types for detection training.
306#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
307pub enum FraudType {
308    // Journal Entry Fraud
309    /// Fictitious journal entry with no business purpose.
310    FictitiousEntry,
311    /// Fictitious transaction (alias for FictitiousEntry).
312    FictitiousTransaction,
313    /// Round-dollar amounts suggesting manual manipulation.
314    RoundDollarManipulation,
315    /// Entry posted just below approval threshold.
316    JustBelowThreshold,
317    /// Revenue recognition manipulation.
318    RevenueManipulation,
319    /// Expense capitalization fraud.
320    ImproperCapitalization,
321    /// Improperly capitalizing expenses as assets.
322    ExpenseCapitalization,
323    /// Cookie jar reserves manipulation.
324    ReserveManipulation,
325    /// Round-tripping funds through suspense/clearing accounts.
326    SuspenseAccountAbuse,
327    /// Splitting transactions to stay below approval thresholds.
328    SplitTransaction,
329    /// Unusual timing (weekend, holiday, after-hours postings).
330    TimingAnomaly,
331    /// Posting to unauthorized accounts.
332    UnauthorizedAccess,
333
334    // Approval Fraud
335    /// User approving their own request.
336    SelfApproval,
337    /// Approval beyond authorized limit.
338    ExceededApprovalLimit,
339    /// Segregation of duties violation.
340    SegregationOfDutiesViolation,
341    /// Approval by unauthorized user.
342    UnauthorizedApproval,
343    /// Collusion between approver and requester.
344    CollusiveApproval,
345
346    // Vendor/Payment Fraud
347    /// Fictitious vendor.
348    FictitiousVendor,
349    /// Duplicate payment to vendor.
350    DuplicatePayment,
351    /// Payment to shell company.
352    ShellCompanyPayment,
353    /// Kickback scheme.
354    Kickback,
355    /// Kickback scheme (alias).
356    KickbackScheme,
357    /// Invoice manipulation.
358    InvoiceManipulation,
359
360    // Asset Fraud
361    /// Misappropriation of assets.
362    AssetMisappropriation,
363    /// Inventory theft.
364    InventoryTheft,
365    /// Ghost employee.
366    GhostEmployee,
367
368    // Financial Statement Fraud
369    /// Premature revenue recognition.
370    PrematureRevenue,
371    /// Understated liabilities.
372    UnderstatedLiabilities,
373    /// Overstated assets.
374    OverstatedAssets,
375    /// Channel stuffing.
376    ChannelStuffing,
377
378    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
379    /// Improper revenue recognition timing (ASC 606/IFRS 15).
380    ImproperRevenueRecognition,
381    /// Multiple performance obligations not properly separated.
382    ImproperPoAllocation,
383    /// Variable consideration not properly estimated.
384    VariableConsiderationManipulation,
385    /// Contract modifications not properly accounted for.
386    ContractModificationMisstatement,
387
388    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
389    /// Lease classification manipulation (operating vs finance).
390    LeaseClassificationManipulation,
391    /// Off-balance sheet lease fraud.
392    OffBalanceSheetLease,
393    /// Lease liability understatement.
394    LeaseLiabilityUnderstatement,
395    /// ROU asset misstatement.
396    RouAssetMisstatement,
397
398    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
399    /// Fair value hierarchy misclassification.
400    FairValueHierarchyManipulation,
401    /// Level 3 input manipulation.
402    Level3InputManipulation,
403    /// Valuation technique manipulation.
404    ValuationTechniqueManipulation,
405
406    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
407    /// Delayed impairment recognition.
408    DelayedImpairment,
409    /// Improperly avoiding impairment testing.
410    ImpairmentTestAvoidance,
411    /// Cash flow projection manipulation for impairment.
412    CashFlowProjectionManipulation,
413    /// Improper impairment reversal (IFRS only).
414    ImproperImpairmentReversal,
415}
416
417impl FraudType {
418    /// Returns severity level (1-5).
419    pub fn severity(&self) -> u8 {
420        match self {
421            FraudType::RoundDollarManipulation => 2,
422            FraudType::JustBelowThreshold => 3,
423            FraudType::SelfApproval => 3,
424            FraudType::ExceededApprovalLimit => 3,
425            FraudType::DuplicatePayment => 3,
426            FraudType::FictitiousEntry => 4,
427            FraudType::RevenueManipulation => 5,
428            FraudType::FictitiousVendor => 5,
429            FraudType::ShellCompanyPayment => 5,
430            FraudType::AssetMisappropriation => 5,
431            FraudType::SegregationOfDutiesViolation => 4,
432            FraudType::CollusiveApproval => 5,
433            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
434            FraudType::ImproperRevenueRecognition => 5,
435            FraudType::ImproperPoAllocation => 4,
436            FraudType::VariableConsiderationManipulation => 4,
437            FraudType::ContractModificationMisstatement => 3,
438            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
439            FraudType::LeaseClassificationManipulation => 4,
440            FraudType::OffBalanceSheetLease => 5,
441            FraudType::LeaseLiabilityUnderstatement => 4,
442            FraudType::RouAssetMisstatement => 3,
443            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
444            FraudType::FairValueHierarchyManipulation => 4,
445            FraudType::Level3InputManipulation => 5,
446            FraudType::ValuationTechniqueManipulation => 4,
447            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
448            FraudType::DelayedImpairment => 4,
449            FraudType::ImpairmentTestAvoidance => 4,
450            FraudType::CashFlowProjectionManipulation => 5,
451            FraudType::ImproperImpairmentReversal => 3,
452            _ => 4,
453        }
454    }
455}
456
457/// Error types for error detection.
458#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
459pub enum ErrorType {
460    // Data Entry Errors
461    /// Duplicate document entry.
462    DuplicateEntry,
463    /// Reversed debit/credit amounts.
464    ReversedAmount,
465    /// Transposed digits in amount.
466    TransposedDigits,
467    /// Wrong decimal placement.
468    DecimalError,
469    /// Missing required field.
470    MissingField,
471    /// Invalid account code.
472    InvalidAccount,
473
474    // Timing Errors
475    /// Posted to wrong period.
476    WrongPeriod,
477    /// Backdated entry.
478    BackdatedEntry,
479    /// Future-dated entry.
480    FutureDatedEntry,
481    /// Cutoff error.
482    CutoffError,
483
484    // Classification Errors
485    /// Wrong account classification.
486    MisclassifiedAccount,
487    /// Wrong cost center.
488    WrongCostCenter,
489    /// Wrong company code.
490    WrongCompanyCode,
491
492    // Calculation Errors
493    /// Unbalanced journal entry.
494    UnbalancedEntry,
495    /// Rounding error.
496    RoundingError,
497    /// Currency conversion error.
498    CurrencyError,
499    /// Tax calculation error.
500    TaxCalculationError,
501
502    // Accounting Standards Errors (Non-Fraudulent)
503    /// Wrong revenue recognition timing (honest mistake).
504    RevenueTimingError,
505    /// Performance obligation allocation error.
506    PoAllocationError,
507    /// Lease classification error (operating vs finance).
508    LeaseClassificationError,
509    /// Lease calculation error (PV, amortization).
510    LeaseCalculationError,
511    /// Fair value measurement error.
512    FairValueError,
513    /// Impairment calculation error.
514    ImpairmentCalculationError,
515    /// Discount rate error.
516    DiscountRateError,
517    /// Framework application error (IFRS vs GAAP).
518    FrameworkApplicationError,
519}
520
521impl ErrorType {
522    /// Returns severity level (1-5).
523    pub fn severity(&self) -> u8 {
524        match self {
525            ErrorType::RoundingError => 1,
526            ErrorType::MissingField => 2,
527            ErrorType::TransposedDigits => 2,
528            ErrorType::DecimalError => 3,
529            ErrorType::DuplicateEntry => 3,
530            ErrorType::ReversedAmount => 3,
531            ErrorType::WrongPeriod => 4,
532            ErrorType::UnbalancedEntry => 5,
533            ErrorType::CurrencyError => 4,
534            // Accounting Standards Errors
535            ErrorType::RevenueTimingError => 4,
536            ErrorType::PoAllocationError => 3,
537            ErrorType::LeaseClassificationError => 3,
538            ErrorType::LeaseCalculationError => 3,
539            ErrorType::FairValueError => 4,
540            ErrorType::ImpairmentCalculationError => 4,
541            ErrorType::DiscountRateError => 3,
542            ErrorType::FrameworkApplicationError => 4,
543            _ => 3,
544        }
545    }
546}
547
548/// Process issue types.
549#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
550pub enum ProcessIssueType {
551    // Approval Issues
552    /// Approval skipped entirely.
553    SkippedApproval,
554    /// Late approval (after posting).
555    LateApproval,
556    /// Missing supporting documentation.
557    MissingDocumentation,
558    /// Incomplete approval chain.
559    IncompleteApprovalChain,
560
561    // Timing Issues
562    /// Late posting.
563    LatePosting,
564    /// Posting outside business hours.
565    AfterHoursPosting,
566    /// Weekend/holiday posting.
567    WeekendPosting,
568    /// Rushed period-end posting.
569    RushedPeriodEnd,
570
571    // Control Issues
572    /// Manual override of system control.
573    ManualOverride,
574    /// Unusual user access pattern.
575    UnusualAccess,
576    /// System bypass.
577    SystemBypass,
578    /// Batch processing anomaly.
579    BatchAnomaly,
580
581    // Documentation Issues
582    /// Vague or missing description.
583    VagueDescription,
584    /// Changed after posting.
585    PostFactoChange,
586    /// Incomplete audit trail.
587    IncompleteAuditTrail,
588}
589
590impl ProcessIssueType {
591    /// Returns severity level (1-5).
592    pub fn severity(&self) -> u8 {
593        match self {
594            ProcessIssueType::VagueDescription => 1,
595            ProcessIssueType::LatePosting => 2,
596            ProcessIssueType::AfterHoursPosting => 2,
597            ProcessIssueType::WeekendPosting => 2,
598            ProcessIssueType::SkippedApproval => 4,
599            ProcessIssueType::ManualOverride => 4,
600            ProcessIssueType::SystemBypass => 5,
601            ProcessIssueType::IncompleteAuditTrail => 4,
602            _ => 3,
603        }
604    }
605}
606
607/// Statistical anomaly types.
608#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
609pub enum StatisticalAnomalyType {
610    // Amount Anomalies
611    /// Amount significantly above normal.
612    UnusuallyHighAmount,
613    /// Amount significantly below normal.
614    UnusuallyLowAmount,
615    /// Violates Benford's Law distribution.
616    BenfordViolation,
617    /// Exact duplicate amount (suspicious).
618    ExactDuplicateAmount,
619    /// Repeating pattern in amounts.
620    RepeatingAmount,
621
622    // Frequency Anomalies
623    /// Unusual transaction frequency.
624    UnusualFrequency,
625    /// Burst of transactions.
626    TransactionBurst,
627    /// Unusual time of day.
628    UnusualTiming,
629
630    // Trend Anomalies
631    /// Break in historical trend.
632    TrendBreak,
633    /// Sudden level shift.
634    LevelShift,
635    /// Seasonal pattern violation.
636    SeasonalAnomaly,
637
638    // Distribution Anomalies
639    /// Outlier in distribution.
640    StatisticalOutlier,
641    /// Change in variance.
642    VarianceChange,
643    /// Distribution shift.
644    DistributionShift,
645}
646
647impl StatisticalAnomalyType {
648    /// Returns severity level (1-5).
649    pub fn severity(&self) -> u8 {
650        match self {
651            StatisticalAnomalyType::UnusualTiming => 1,
652            StatisticalAnomalyType::UnusualFrequency => 2,
653            StatisticalAnomalyType::BenfordViolation => 2,
654            StatisticalAnomalyType::UnusuallyHighAmount => 3,
655            StatisticalAnomalyType::TrendBreak => 3,
656            StatisticalAnomalyType::TransactionBurst => 4,
657            StatisticalAnomalyType::ExactDuplicateAmount => 3,
658            _ => 3,
659        }
660    }
661}
662
663/// Relational/graph anomaly types.
664#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
665pub enum RelationalAnomalyType {
666    // Transaction Pattern Anomalies
667    /// Circular transaction pattern.
668    CircularTransaction,
669    /// Unusual account combination.
670    UnusualAccountPair,
671    /// New trading partner.
672    NewCounterparty,
673    /// Dormant account suddenly active.
674    DormantAccountActivity,
675
676    // Network Anomalies
677    /// Unusual network centrality.
678    CentralityAnomaly,
679    /// Isolated transaction cluster.
680    IsolatedCluster,
681    /// Bridge node anomaly.
682    BridgeNodeAnomaly,
683    /// Community structure change.
684    CommunityAnomaly,
685
686    // Relationship Anomalies
687    /// Missing expected relationship.
688    MissingRelationship,
689    /// Unexpected relationship.
690    UnexpectedRelationship,
691    /// Relationship strength change.
692    RelationshipStrengthChange,
693
694    // Intercompany Anomalies
695    /// Unmatched intercompany transaction.
696    UnmatchedIntercompany,
697    /// Circular intercompany flow.
698    CircularIntercompany,
699    /// Transfer pricing anomaly.
700    TransferPricingAnomaly,
701}
702
703impl RelationalAnomalyType {
704    /// Returns severity level (1-5).
705    pub fn severity(&self) -> u8 {
706        match self {
707            RelationalAnomalyType::NewCounterparty => 1,
708            RelationalAnomalyType::DormantAccountActivity => 2,
709            RelationalAnomalyType::UnusualAccountPair => 2,
710            RelationalAnomalyType::CircularTransaction => 4,
711            RelationalAnomalyType::CircularIntercompany => 4,
712            RelationalAnomalyType::TransferPricingAnomaly => 4,
713            RelationalAnomalyType::UnmatchedIntercompany => 3,
714            _ => 3,
715        }
716    }
717}
718
719/// A labeled anomaly for supervised learning.
720#[derive(Debug, Clone, Serialize, Deserialize)]
721pub struct LabeledAnomaly {
722    /// Unique anomaly identifier.
723    pub anomaly_id: String,
724    /// Type of anomaly.
725    pub anomaly_type: AnomalyType,
726    /// Document or entity that contains the anomaly.
727    pub document_id: String,
728    /// Document type (JE, PO, Invoice, etc.).
729    pub document_type: String,
730    /// Company code.
731    pub company_code: String,
732    /// Date the anomaly occurred.
733    pub anomaly_date: NaiveDate,
734    /// Timestamp when detected/injected.
735    pub detection_timestamp: NaiveDateTime,
736    /// Confidence score (0.0 - 1.0) for injected anomalies.
737    pub confidence: f64,
738    /// Severity (1-5).
739    pub severity: u8,
740    /// Description of the anomaly.
741    pub description: String,
742    /// Related entities (user IDs, account codes, etc.).
743    pub related_entities: Vec<String>,
744    /// Monetary impact if applicable.
745    pub monetary_impact: Option<Decimal>,
746    /// Additional metadata.
747    pub metadata: HashMap<String, String>,
748    /// Whether this was injected (true) or naturally occurring (false).
749    pub is_injected: bool,
750    /// Injection strategy used (if injected) - legacy string field.
751    pub injection_strategy: Option<String>,
752    /// Cluster ID if part of an anomaly cluster.
753    pub cluster_id: Option<String>,
754
755    // ========================================
756    // PROVENANCE TRACKING FIELDS (Phase 1.2)
757    // ========================================
758    /// Hash of the original document before modification.
759    /// Enables tracking what the document looked like pre-injection.
760    #[serde(default, skip_serializing_if = "Option::is_none")]
761    pub original_document_hash: Option<String>,
762
763    /// Causal reason explaining why this anomaly was injected.
764    /// Provides "why" tracking for each anomaly.
765    #[serde(default, skip_serializing_if = "Option::is_none")]
766    pub causal_reason: Option<AnomalyCausalReason>,
767
768    /// Structured injection strategy with parameters.
769    /// More detailed than the legacy string-based injection_strategy field.
770    #[serde(default, skip_serializing_if = "Option::is_none")]
771    pub structured_strategy: Option<InjectionStrategy>,
772
773    /// Parent anomaly ID if this was derived from another anomaly.
774    /// Enables anomaly transformation chains.
775    #[serde(default, skip_serializing_if = "Option::is_none")]
776    pub parent_anomaly_id: Option<String>,
777
778    /// Child anomaly IDs that were derived from this anomaly.
779    #[serde(default, skip_serializing_if = "Vec::is_empty")]
780    pub child_anomaly_ids: Vec<String>,
781
782    /// Scenario ID if this anomaly is part of a multi-step scenario.
783    #[serde(default, skip_serializing_if = "Option::is_none")]
784    pub scenario_id: Option<String>,
785
786    /// Generation run ID that produced this anomaly.
787    /// Enables tracing anomalies back to their generation run.
788    #[serde(default, skip_serializing_if = "Option::is_none")]
789    pub run_id: Option<String>,
790
791    /// Seed used for RNG during generation.
792    /// Enables reproducibility.
793    #[serde(default, skip_serializing_if = "Option::is_none")]
794    pub generation_seed: Option<u64>,
795}
796
797impl LabeledAnomaly {
798    /// Creates a new labeled anomaly.
799    pub fn new(
800        anomaly_id: String,
801        anomaly_type: AnomalyType,
802        document_id: String,
803        document_type: String,
804        company_code: String,
805        anomaly_date: NaiveDate,
806    ) -> Self {
807        let severity = anomaly_type.severity();
808        let description = format!(
809            "{} - {} in document {}",
810            anomaly_type.category(),
811            anomaly_type.type_name(),
812            document_id
813        );
814
815        Self {
816            anomaly_id,
817            anomaly_type,
818            document_id,
819            document_type,
820            company_code,
821            anomaly_date,
822            detection_timestamp: chrono::Local::now().naive_local(),
823            confidence: 1.0,
824            severity,
825            description,
826            related_entities: Vec::new(),
827            monetary_impact: None,
828            metadata: HashMap::new(),
829            is_injected: true,
830            injection_strategy: None,
831            cluster_id: None,
832            // Provenance fields
833            original_document_hash: None,
834            causal_reason: None,
835            structured_strategy: None,
836            parent_anomaly_id: None,
837            child_anomaly_ids: Vec::new(),
838            scenario_id: None,
839            run_id: None,
840            generation_seed: None,
841        }
842    }
843
844    /// Sets the description.
845    pub fn with_description(mut self, description: &str) -> Self {
846        self.description = description.to_string();
847        self
848    }
849
850    /// Sets the monetary impact.
851    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
852        self.monetary_impact = Some(impact);
853        self
854    }
855
856    /// Adds a related entity.
857    pub fn with_related_entity(mut self, entity: &str) -> Self {
858        self.related_entities.push(entity.to_string());
859        self
860    }
861
862    /// Adds metadata.
863    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
864        self.metadata.insert(key.to_string(), value.to_string());
865        self
866    }
867
868    /// Sets the injection strategy (legacy string).
869    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
870        self.injection_strategy = Some(strategy.to_string());
871        self
872    }
873
874    /// Sets the cluster ID.
875    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
876        self.cluster_id = Some(cluster_id.to_string());
877        self
878    }
879
880    // ========================================
881    // PROVENANCE BUILDER METHODS (Phase 1.2)
882    // ========================================
883
884    /// Sets the original document hash for provenance tracking.
885    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
886        self.original_document_hash = Some(hash.to_string());
887        self
888    }
889
890    /// Sets the causal reason for this anomaly.
891    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
892        self.causal_reason = Some(reason);
893        self
894    }
895
896    /// Sets the structured injection strategy.
897    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
898        // Also set the legacy string field for backward compatibility
899        self.injection_strategy = Some(strategy.strategy_type().to_string());
900        self.structured_strategy = Some(strategy);
901        self
902    }
903
904    /// Sets the parent anomaly ID (for anomaly derivation chains).
905    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
906        self.parent_anomaly_id = Some(parent_id.to_string());
907        self
908    }
909
910    /// Adds a child anomaly ID.
911    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
912        self.child_anomaly_ids.push(child_id.to_string());
913        self
914    }
915
916    /// Sets the scenario ID for multi-step scenario tracking.
917    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
918        self.scenario_id = Some(scenario_id.to_string());
919        self
920    }
921
922    /// Sets the generation run ID.
923    pub fn with_run_id(mut self, run_id: &str) -> Self {
924        self.run_id = Some(run_id.to_string());
925        self
926    }
927
928    /// Sets the generation seed for reproducibility.
929    pub fn with_generation_seed(mut self, seed: u64) -> Self {
930        self.generation_seed = Some(seed);
931        self
932    }
933
934    /// Sets multiple provenance fields at once for convenience.
935    pub fn with_provenance(
936        mut self,
937        run_id: Option<&str>,
938        seed: Option<u64>,
939        causal_reason: Option<AnomalyCausalReason>,
940    ) -> Self {
941        if let Some(id) = run_id {
942            self.run_id = Some(id.to_string());
943        }
944        self.generation_seed = seed;
945        self.causal_reason = causal_reason;
946        self
947    }
948
949    /// Converts to a feature vector for ML.
950    ///
951    /// Returns a vector of 15 features:
952    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
953    /// - 1 feature: Severity (normalized 0-1)
954    /// - 1 feature: Confidence
955    /// - 1 feature: Has monetary impact (0/1)
956    /// - 1 feature: Monetary impact (log-scaled)
957    /// - 1 feature: Is intentional (0/1)
958    /// - 1 feature: Number of related entities
959    /// - 1 feature: Is part of cluster (0/1)
960    /// - 1 feature: Is part of scenario (0/1)
961    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
962    pub fn to_features(&self) -> Vec<f64> {
963        let mut features = Vec::new();
964
965        // Category one-hot encoding
966        let categories = [
967            "Fraud",
968            "Error",
969            "ProcessIssue",
970            "Statistical",
971            "Relational",
972            "Custom",
973        ];
974        for cat in &categories {
975            features.push(if self.anomaly_type.category() == *cat {
976                1.0
977            } else {
978                0.0
979            });
980        }
981
982        // Severity (normalized)
983        features.push(self.severity as f64 / 5.0);
984
985        // Confidence
986        features.push(self.confidence);
987
988        // Has monetary impact
989        features.push(if self.monetary_impact.is_some() {
990            1.0
991        } else {
992            0.0
993        });
994
995        // Monetary impact (log-scaled)
996        if let Some(impact) = self.monetary_impact {
997            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
998            features.push((impact_f64.abs() + 1.0).ln());
999        } else {
1000            features.push(0.0);
1001        }
1002
1003        // Is intentional
1004        features.push(if self.anomaly_type.is_intentional() {
1005            1.0
1006        } else {
1007            0.0
1008        });
1009
1010        // Number of related entities
1011        features.push(self.related_entities.len() as f64);
1012
1013        // Is part of cluster
1014        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1015
1016        // Provenance features
1017        // Is part of scenario
1018        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1019
1020        // Has parent anomaly (indicates this is a derived anomaly)
1021        features.push(if self.parent_anomaly_id.is_some() {
1022            1.0
1023        } else {
1024            0.0
1025        });
1026
1027        features
1028    }
1029
1030    /// Returns the number of features in the feature vector.
1031    pub fn feature_count() -> usize {
1032        15 // 6 category + 9 other features
1033    }
1034
1035    /// Returns feature names for documentation/ML metadata.
1036    pub fn feature_names() -> Vec<&'static str> {
1037        vec![
1038            "category_fraud",
1039            "category_error",
1040            "category_process_issue",
1041            "category_statistical",
1042            "category_relational",
1043            "category_custom",
1044            "severity_normalized",
1045            "confidence",
1046            "has_monetary_impact",
1047            "monetary_impact_log",
1048            "is_intentional",
1049            "related_entity_count",
1050            "is_clustered",
1051            "is_scenario_part",
1052            "is_derived",
1053        ]
1054    }
1055}
1056
1057/// Summary of anomalies for reporting.
1058#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1059pub struct AnomalySummary {
1060    /// Total anomaly count.
1061    pub total_count: usize,
1062    /// Count by category.
1063    pub by_category: HashMap<String, usize>,
1064    /// Count by specific type.
1065    pub by_type: HashMap<String, usize>,
1066    /// Count by severity.
1067    pub by_severity: HashMap<u8, usize>,
1068    /// Count by company.
1069    pub by_company: HashMap<String, usize>,
1070    /// Total monetary impact.
1071    pub total_monetary_impact: Decimal,
1072    /// Date range.
1073    pub date_range: Option<(NaiveDate, NaiveDate)>,
1074    /// Number of clusters.
1075    pub cluster_count: usize,
1076}
1077
1078impl AnomalySummary {
1079    /// Creates a summary from a list of anomalies.
1080    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1081        let mut summary = AnomalySummary {
1082            total_count: anomalies.len(),
1083            ..Default::default()
1084        };
1085
1086        let mut min_date: Option<NaiveDate> = None;
1087        let mut max_date: Option<NaiveDate> = None;
1088        let mut clusters = std::collections::HashSet::new();
1089
1090        for anomaly in anomalies {
1091            // By category
1092            *summary
1093                .by_category
1094                .entry(anomaly.anomaly_type.category().to_string())
1095                .or_insert(0) += 1;
1096
1097            // By type
1098            *summary
1099                .by_type
1100                .entry(anomaly.anomaly_type.type_name())
1101                .or_insert(0) += 1;
1102
1103            // By severity
1104            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1105
1106            // By company
1107            *summary
1108                .by_company
1109                .entry(anomaly.company_code.clone())
1110                .or_insert(0) += 1;
1111
1112            // Monetary impact
1113            if let Some(impact) = anomaly.monetary_impact {
1114                summary.total_monetary_impact += impact;
1115            }
1116
1117            // Date range
1118            match min_date {
1119                None => min_date = Some(anomaly.anomaly_date),
1120                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1121                _ => {}
1122            }
1123            match max_date {
1124                None => max_date = Some(anomaly.anomaly_date),
1125                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1126                _ => {}
1127            }
1128
1129            // Clusters
1130            if let Some(cluster_id) = &anomaly.cluster_id {
1131                clusters.insert(cluster_id.clone());
1132            }
1133        }
1134
1135        summary.date_range = min_date.zip(max_date);
1136        summary.cluster_count = clusters.len();
1137
1138        summary
1139    }
1140}
1141
1142// ============================================================================
1143// ENHANCED ANOMALY TAXONOMY (FR-003)
1144// ============================================================================
1145
1146/// High-level anomaly category for multi-class classification.
1147///
1148/// These categories provide a more granular classification than the base
1149/// AnomalyType enum, enabling better ML model training and audit reporting.
1150#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1151pub enum AnomalyCategory {
1152    // Vendor-related anomalies
1153    /// Fictitious or shell vendor.
1154    FictitiousVendor,
1155    /// Kickback or collusion with vendor.
1156    VendorKickback,
1157    /// Related party vendor transactions.
1158    RelatedPartyVendor,
1159
1160    // Transaction-related anomalies
1161    /// Duplicate payment or invoice.
1162    DuplicatePayment,
1163    /// Unauthorized transaction.
1164    UnauthorizedTransaction,
1165    /// Structured transactions to avoid thresholds.
1166    StructuredTransaction,
1167
1168    // Pattern-based anomalies
1169    /// Circular flow of funds.
1170    CircularFlow,
1171    /// Behavioral anomaly (deviation from normal patterns).
1172    BehavioralAnomaly,
1173    /// Timing-based anomaly.
1174    TimingAnomaly,
1175
1176    // Journal entry anomalies
1177    /// Manual journal entry anomaly.
1178    JournalAnomaly,
1179    /// Manual override of controls.
1180    ManualOverride,
1181    /// Missing approval in chain.
1182    MissingApproval,
1183
1184    // Statistical anomalies
1185    /// Statistical outlier.
1186    StatisticalOutlier,
1187    /// Distribution anomaly (Benford, etc.).
1188    DistributionAnomaly,
1189
1190    // Custom category
1191    /// User-defined category.
1192    Custom(String),
1193}
1194
1195impl AnomalyCategory {
1196    /// Derives an AnomalyCategory from an AnomalyType.
1197    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1198        match anomaly_type {
1199            AnomalyType::Fraud(fraud_type) => match fraud_type {
1200                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1201                    AnomalyCategory::FictitiousVendor
1202                }
1203                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1204                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1205                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1206                    AnomalyCategory::StructuredTransaction
1207                }
1208                FraudType::SelfApproval
1209                | FraudType::UnauthorizedApproval
1210                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1211                FraudType::TimingAnomaly
1212                | FraudType::RoundDollarManipulation
1213                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1214                _ => AnomalyCategory::BehavioralAnomaly,
1215            },
1216            AnomalyType::Error(error_type) => match error_type {
1217                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1218                ErrorType::WrongPeriod
1219                | ErrorType::BackdatedEntry
1220                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1221                _ => AnomalyCategory::JournalAnomaly,
1222            },
1223            AnomalyType::ProcessIssue(process_type) => match process_type {
1224                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1225                    AnomalyCategory::MissingApproval
1226                }
1227                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1228                    AnomalyCategory::ManualOverride
1229                }
1230                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1231                    AnomalyCategory::TimingAnomaly
1232                }
1233                _ => AnomalyCategory::BehavioralAnomaly,
1234            },
1235            AnomalyType::Statistical(stat_type) => match stat_type {
1236                StatisticalAnomalyType::BenfordViolation
1237                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1238                _ => AnomalyCategory::StatisticalOutlier,
1239            },
1240            AnomalyType::Relational(rel_type) => match rel_type {
1241                RelationalAnomalyType::CircularTransaction
1242                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1243                _ => AnomalyCategory::BehavioralAnomaly,
1244            },
1245            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1246        }
1247    }
1248
1249    /// Returns the category name as a string.
1250    pub fn name(&self) -> &str {
1251        match self {
1252            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1253            AnomalyCategory::VendorKickback => "vendor_kickback",
1254            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1255            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1256            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1257            AnomalyCategory::StructuredTransaction => "structured_transaction",
1258            AnomalyCategory::CircularFlow => "circular_flow",
1259            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1260            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1261            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1262            AnomalyCategory::ManualOverride => "manual_override",
1263            AnomalyCategory::MissingApproval => "missing_approval",
1264            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1265            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1266            AnomalyCategory::Custom(s) => s.as_str(),
1267        }
1268    }
1269
1270    /// Returns the ordinal value for ML encoding.
1271    pub fn ordinal(&self) -> u8 {
1272        match self {
1273            AnomalyCategory::FictitiousVendor => 0,
1274            AnomalyCategory::VendorKickback => 1,
1275            AnomalyCategory::RelatedPartyVendor => 2,
1276            AnomalyCategory::DuplicatePayment => 3,
1277            AnomalyCategory::UnauthorizedTransaction => 4,
1278            AnomalyCategory::StructuredTransaction => 5,
1279            AnomalyCategory::CircularFlow => 6,
1280            AnomalyCategory::BehavioralAnomaly => 7,
1281            AnomalyCategory::TimingAnomaly => 8,
1282            AnomalyCategory::JournalAnomaly => 9,
1283            AnomalyCategory::ManualOverride => 10,
1284            AnomalyCategory::MissingApproval => 11,
1285            AnomalyCategory::StatisticalOutlier => 12,
1286            AnomalyCategory::DistributionAnomaly => 13,
1287            AnomalyCategory::Custom(_) => 14,
1288        }
1289    }
1290
1291    /// Returns the total number of categories (excluding Custom).
1292    pub fn category_count() -> usize {
1293        15 // 14 fixed categories + Custom
1294    }
1295}
1296
1297/// Type of contributing factor for anomaly confidence/severity calculation.
1298#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1299pub enum FactorType {
1300    /// Amount deviation from expected value.
1301    AmountDeviation,
1302    /// Proximity to approval/reporting threshold.
1303    ThresholdProximity,
1304    /// Timing-related anomaly indicator.
1305    TimingAnomaly,
1306    /// Entity risk score contribution.
1307    EntityRisk,
1308    /// Pattern match confidence.
1309    PatternMatch,
1310    /// Frequency deviation from normal.
1311    FrequencyDeviation,
1312    /// Relationship-based anomaly indicator.
1313    RelationshipAnomaly,
1314    /// Control bypass indicator.
1315    ControlBypass,
1316    /// Benford's Law violation.
1317    BenfordViolation,
1318    /// Duplicate indicator.
1319    DuplicateIndicator,
1320    /// Approval chain issue.
1321    ApprovalChainIssue,
1322    /// Documentation gap.
1323    DocumentationGap,
1324    /// Custom factor type.
1325    Custom,
1326}
1327
1328impl FactorType {
1329    /// Returns the factor type name.
1330    pub fn name(&self) -> &'static str {
1331        match self {
1332            FactorType::AmountDeviation => "amount_deviation",
1333            FactorType::ThresholdProximity => "threshold_proximity",
1334            FactorType::TimingAnomaly => "timing_anomaly",
1335            FactorType::EntityRisk => "entity_risk",
1336            FactorType::PatternMatch => "pattern_match",
1337            FactorType::FrequencyDeviation => "frequency_deviation",
1338            FactorType::RelationshipAnomaly => "relationship_anomaly",
1339            FactorType::ControlBypass => "control_bypass",
1340            FactorType::BenfordViolation => "benford_violation",
1341            FactorType::DuplicateIndicator => "duplicate_indicator",
1342            FactorType::ApprovalChainIssue => "approval_chain_issue",
1343            FactorType::DocumentationGap => "documentation_gap",
1344            FactorType::Custom => "custom",
1345        }
1346    }
1347}
1348
1349/// Evidence supporting a contributing factor.
1350#[derive(Debug, Clone, Serialize, Deserialize)]
1351pub struct FactorEvidence {
1352    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1353    pub source: String,
1354    /// Raw evidence data.
1355    pub data: HashMap<String, String>,
1356}
1357
1358/// A contributing factor to anomaly confidence/severity.
1359#[derive(Debug, Clone, Serialize, Deserialize)]
1360pub struct ContributingFactor {
1361    /// Type of factor.
1362    pub factor_type: FactorType,
1363    /// Observed value.
1364    pub value: f64,
1365    /// Threshold or expected value.
1366    pub threshold: f64,
1367    /// Direction of comparison (true = value > threshold is anomalous).
1368    pub direction_greater: bool,
1369    /// Weight of this factor in overall calculation (0.0 - 1.0).
1370    pub weight: f64,
1371    /// Human-readable description.
1372    pub description: String,
1373    /// Optional supporting evidence.
1374    pub evidence: Option<FactorEvidence>,
1375}
1376
1377impl ContributingFactor {
1378    /// Creates a new contributing factor.
1379    pub fn new(
1380        factor_type: FactorType,
1381        value: f64,
1382        threshold: f64,
1383        direction_greater: bool,
1384        weight: f64,
1385        description: &str,
1386    ) -> Self {
1387        Self {
1388            factor_type,
1389            value,
1390            threshold,
1391            direction_greater,
1392            weight,
1393            description: description.to_string(),
1394            evidence: None,
1395        }
1396    }
1397
1398    /// Adds evidence to the factor.
1399    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1400        self.evidence = Some(FactorEvidence {
1401            source: source.to_string(),
1402            data,
1403        });
1404        self
1405    }
1406
1407    /// Calculates the factor's contribution to anomaly score.
1408    pub fn contribution(&self) -> f64 {
1409        let deviation = if self.direction_greater {
1410            (self.value - self.threshold).max(0.0)
1411        } else {
1412            (self.threshold - self.value).max(0.0)
1413        };
1414
1415        // Normalize by threshold to get relative deviation
1416        let relative_deviation = if self.threshold.abs() > 0.001 {
1417            deviation / self.threshold.abs()
1418        } else {
1419            deviation
1420        };
1421
1422        // Apply weight and cap at 1.0
1423        (relative_deviation * self.weight).min(1.0)
1424    }
1425}
1426
1427/// Enhanced anomaly label with dynamic confidence and severity.
1428#[derive(Debug, Clone, Serialize, Deserialize)]
1429pub struct EnhancedAnomalyLabel {
1430    /// Base labeled anomaly (backward compatible).
1431    pub base: LabeledAnomaly,
1432    /// Enhanced category classification.
1433    pub category: AnomalyCategory,
1434    /// Dynamically calculated confidence (0.0 - 1.0).
1435    pub enhanced_confidence: f64,
1436    /// Contextually calculated severity (0.0 - 1.0).
1437    pub enhanced_severity: f64,
1438    /// Factors contributing to confidence/severity.
1439    pub contributing_factors: Vec<ContributingFactor>,
1440    /// Secondary categories (for multi-label classification).
1441    pub secondary_categories: Vec<AnomalyCategory>,
1442}
1443
1444impl EnhancedAnomalyLabel {
1445    /// Creates an enhanced label from a base labeled anomaly.
1446    pub fn from_base(base: LabeledAnomaly) -> Self {
1447        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1448        let enhanced_confidence = base.confidence;
1449        let enhanced_severity = base.severity as f64 / 5.0;
1450
1451        Self {
1452            base,
1453            category,
1454            enhanced_confidence,
1455            enhanced_severity,
1456            contributing_factors: Vec::new(),
1457            secondary_categories: Vec::new(),
1458        }
1459    }
1460
1461    /// Sets the enhanced confidence.
1462    pub fn with_confidence(mut self, confidence: f64) -> Self {
1463        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1464        self
1465    }
1466
1467    /// Sets the enhanced severity.
1468    pub fn with_severity(mut self, severity: f64) -> Self {
1469        self.enhanced_severity = severity.clamp(0.0, 1.0);
1470        self
1471    }
1472
1473    /// Adds a contributing factor.
1474    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1475        self.contributing_factors.push(factor);
1476        self
1477    }
1478
1479    /// Adds a secondary category.
1480    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1481        if !self.secondary_categories.contains(&category) && category != self.category {
1482            self.secondary_categories.push(category);
1483        }
1484        self
1485    }
1486
1487    /// Converts to an extended feature vector.
1488    ///
1489    /// Returns base features (15) + enhanced features (10) = 25 features.
1490    pub fn to_features(&self) -> Vec<f64> {
1491        let mut features = self.base.to_features();
1492
1493        // Enhanced features
1494        features.push(self.enhanced_confidence);
1495        features.push(self.enhanced_severity);
1496        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1497        features.push(self.secondary_categories.len() as f64);
1498        features.push(self.contributing_factors.len() as f64);
1499
1500        // Max factor weight
1501        let max_weight = self
1502            .contributing_factors
1503            .iter()
1504            .map(|f| f.weight)
1505            .fold(0.0, f64::max);
1506        features.push(max_weight);
1507
1508        // Factor type indicators (binary flags for key factor types)
1509        let has_control_bypass = self
1510            .contributing_factors
1511            .iter()
1512            .any(|f| f.factor_type == FactorType::ControlBypass);
1513        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1514
1515        let has_amount_deviation = self
1516            .contributing_factors
1517            .iter()
1518            .any(|f| f.factor_type == FactorType::AmountDeviation);
1519        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1520
1521        let has_timing = self
1522            .contributing_factors
1523            .iter()
1524            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1525        features.push(if has_timing { 1.0 } else { 0.0 });
1526
1527        let has_pattern_match = self
1528            .contributing_factors
1529            .iter()
1530            .any(|f| f.factor_type == FactorType::PatternMatch);
1531        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1532
1533        features
1534    }
1535
1536    /// Returns the number of features in the enhanced feature vector.
1537    pub fn feature_count() -> usize {
1538        25 // 15 base + 10 enhanced
1539    }
1540
1541    /// Returns feature names for the enhanced feature vector.
1542    pub fn feature_names() -> Vec<&'static str> {
1543        let mut names = LabeledAnomaly::feature_names();
1544        names.extend(vec![
1545            "enhanced_confidence",
1546            "enhanced_severity",
1547            "category_ordinal",
1548            "secondary_category_count",
1549            "contributing_factor_count",
1550            "max_factor_weight",
1551            "has_control_bypass",
1552            "has_amount_deviation",
1553            "has_timing_factor",
1554            "has_pattern_match",
1555        ]);
1556        names
1557    }
1558}
1559
1560/// Configuration for anomaly rates.
1561#[derive(Debug, Clone, Serialize, Deserialize)]
1562pub struct AnomalyRateConfig {
1563    /// Overall anomaly rate (0.0 - 1.0).
1564    pub total_rate: f64,
1565    /// Fraud rate as proportion of anomalies.
1566    pub fraud_rate: f64,
1567    /// Error rate as proportion of anomalies.
1568    pub error_rate: f64,
1569    /// Process issue rate as proportion of anomalies.
1570    pub process_issue_rate: f64,
1571    /// Statistical anomaly rate as proportion of anomalies.
1572    pub statistical_rate: f64,
1573    /// Relational anomaly rate as proportion of anomalies.
1574    pub relational_rate: f64,
1575}
1576
1577impl Default for AnomalyRateConfig {
1578    fn default() -> Self {
1579        Self {
1580            total_rate: 0.02,         // 2% of transactions are anomalous
1581            fraud_rate: 0.25,         // 25% of anomalies are fraud
1582            error_rate: 0.35,         // 35% of anomalies are errors
1583            process_issue_rate: 0.20, // 20% are process issues
1584            statistical_rate: 0.15,   // 15% are statistical
1585            relational_rate: 0.05,    // 5% are relational
1586        }
1587    }
1588}
1589
1590impl AnomalyRateConfig {
1591    /// Validates that rates sum to approximately 1.0.
1592    pub fn validate(&self) -> Result<(), String> {
1593        let sum = self.fraud_rate
1594            + self.error_rate
1595            + self.process_issue_rate
1596            + self.statistical_rate
1597            + self.relational_rate;
1598
1599        if (sum - 1.0).abs() > 0.01 {
1600            return Err(format!(
1601                "Anomaly category rates must sum to 1.0, got {}",
1602                sum
1603            ));
1604        }
1605
1606        if self.total_rate < 0.0 || self.total_rate > 1.0 {
1607            return Err(format!(
1608                "Total rate must be between 0.0 and 1.0, got {}",
1609                self.total_rate
1610            ));
1611        }
1612
1613        Ok(())
1614    }
1615}
1616
1617#[cfg(test)]
1618mod tests {
1619    use super::*;
1620    use rust_decimal_macros::dec;
1621
1622    #[test]
1623    fn test_anomaly_type_category() {
1624        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
1625        assert_eq!(fraud.category(), "Fraud");
1626        assert!(fraud.is_intentional());
1627
1628        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
1629        assert_eq!(error.category(), "Error");
1630        assert!(!error.is_intentional());
1631    }
1632
1633    #[test]
1634    fn test_labeled_anomaly() {
1635        let anomaly = LabeledAnomaly::new(
1636            "ANO001".to_string(),
1637            AnomalyType::Fraud(FraudType::SelfApproval),
1638            "JE001".to_string(),
1639            "JE".to_string(),
1640            "1000".to_string(),
1641            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1642        )
1643        .with_description("User approved their own expense report")
1644        .with_related_entity("USER001");
1645
1646        assert_eq!(anomaly.severity, 3);
1647        assert!(anomaly.is_injected);
1648        assert_eq!(anomaly.related_entities.len(), 1);
1649    }
1650
1651    #[test]
1652    fn test_labeled_anomaly_with_provenance() {
1653        let anomaly = LabeledAnomaly::new(
1654            "ANO001".to_string(),
1655            AnomalyType::Fraud(FraudType::SelfApproval),
1656            "JE001".to_string(),
1657            "JE".to_string(),
1658            "1000".to_string(),
1659            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1660        )
1661        .with_run_id("run-123")
1662        .with_generation_seed(42)
1663        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
1664        .with_structured_strategy(InjectionStrategy::SelfApproval {
1665            user_id: "USER001".to_string(),
1666        })
1667        .with_scenario("scenario-001")
1668        .with_original_document_hash("abc123");
1669
1670        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
1671        assert_eq!(anomaly.generation_seed, Some(42));
1672        assert!(anomaly.causal_reason.is_some());
1673        assert!(anomaly.structured_strategy.is_some());
1674        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
1675        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
1676
1677        // Check that legacy injection_strategy is also set
1678        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
1679    }
1680
1681    #[test]
1682    fn test_labeled_anomaly_derivation_chain() {
1683        let parent = LabeledAnomaly::new(
1684            "ANO001".to_string(),
1685            AnomalyType::Fraud(FraudType::DuplicatePayment),
1686            "JE001".to_string(),
1687            "JE".to_string(),
1688            "1000".to_string(),
1689            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1690        );
1691
1692        let child = LabeledAnomaly::new(
1693            "ANO002".to_string(),
1694            AnomalyType::Error(ErrorType::DuplicateEntry),
1695            "JE002".to_string(),
1696            "JE".to_string(),
1697            "1000".to_string(),
1698            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1699        )
1700        .with_parent_anomaly(&parent.anomaly_id);
1701
1702        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
1703    }
1704
1705    #[test]
1706    fn test_injection_strategy_description() {
1707        let strategy = InjectionStrategy::AmountManipulation {
1708            original: dec!(1000),
1709            factor: 2.5,
1710        };
1711        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
1712        assert_eq!(strategy.strategy_type(), "AmountManipulation");
1713
1714        let strategy = InjectionStrategy::ThresholdAvoidance {
1715            threshold: dec!(10000),
1716            adjusted_amount: dec!(9999),
1717        };
1718        assert_eq!(
1719            strategy.description(),
1720            "Amount adjusted to avoid 10000 threshold"
1721        );
1722
1723        let strategy = InjectionStrategy::DateShift {
1724            days_shifted: -5,
1725            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1726        };
1727        assert_eq!(strategy.description(), "Date backdated by 5 days");
1728
1729        let strategy = InjectionStrategy::DateShift {
1730            days_shifted: 3,
1731            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1732        };
1733        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
1734    }
1735
1736    #[test]
1737    fn test_causal_reason_variants() {
1738        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
1739        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
1740            assert!((base_rate - 0.02).abs() < 0.001);
1741        }
1742
1743        let reason = AnomalyCausalReason::TemporalPattern {
1744            pattern_name: "year_end_spike".to_string(),
1745        };
1746        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
1747            assert_eq!(pattern_name, "year_end_spike");
1748        }
1749
1750        let reason = AnomalyCausalReason::ScenarioStep {
1751            scenario_type: "kickback".to_string(),
1752            step_number: 3,
1753        };
1754        if let AnomalyCausalReason::ScenarioStep {
1755            scenario_type,
1756            step_number,
1757        } = reason
1758        {
1759            assert_eq!(scenario_type, "kickback");
1760            assert_eq!(step_number, 3);
1761        }
1762    }
1763
1764    #[test]
1765    fn test_feature_vector_length() {
1766        let anomaly = LabeledAnomaly::new(
1767            "ANO001".to_string(),
1768            AnomalyType::Fraud(FraudType::SelfApproval),
1769            "JE001".to_string(),
1770            "JE".to_string(),
1771            "1000".to_string(),
1772            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1773        );
1774
1775        let features = anomaly.to_features();
1776        assert_eq!(features.len(), LabeledAnomaly::feature_count());
1777        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
1778    }
1779
1780    #[test]
1781    fn test_feature_vector_with_provenance() {
1782        let anomaly = LabeledAnomaly::new(
1783            "ANO001".to_string(),
1784            AnomalyType::Fraud(FraudType::SelfApproval),
1785            "JE001".to_string(),
1786            "JE".to_string(),
1787            "1000".to_string(),
1788            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1789        )
1790        .with_scenario("scenario-001")
1791        .with_parent_anomaly("ANO000");
1792
1793        let features = anomaly.to_features();
1794
1795        // Last two features should be 1.0 (has scenario, has parent)
1796        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
1797        assert_eq!(features[features.len() - 1], 1.0); // is_derived
1798    }
1799
1800    #[test]
1801    fn test_anomaly_summary() {
1802        let anomalies = vec![
1803            LabeledAnomaly::new(
1804                "ANO001".to_string(),
1805                AnomalyType::Fraud(FraudType::SelfApproval),
1806                "JE001".to_string(),
1807                "JE".to_string(),
1808                "1000".to_string(),
1809                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1810            ),
1811            LabeledAnomaly::new(
1812                "ANO002".to_string(),
1813                AnomalyType::Error(ErrorType::DuplicateEntry),
1814                "JE002".to_string(),
1815                "JE".to_string(),
1816                "1000".to_string(),
1817                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
1818            ),
1819        ];
1820
1821        let summary = AnomalySummary::from_anomalies(&anomalies);
1822
1823        assert_eq!(summary.total_count, 2);
1824        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
1825        assert_eq!(summary.by_category.get("Error"), Some(&1));
1826    }
1827
1828    #[test]
1829    fn test_rate_config_validation() {
1830        let config = AnomalyRateConfig::default();
1831        assert!(config.validate().is_ok());
1832
1833        let bad_config = AnomalyRateConfig {
1834            fraud_rate: 0.5,
1835            error_rate: 0.5,
1836            process_issue_rate: 0.5, // Sum > 1.0
1837            ..Default::default()
1838        };
1839        assert!(bad_config.validate().is_err());
1840    }
1841
1842    #[test]
1843    fn test_injection_strategy_serialization() {
1844        let strategy = InjectionStrategy::SoDViolation {
1845            duty1: "CreatePO".to_string(),
1846            duty2: "ApprovePO".to_string(),
1847            violating_user: "USER001".to_string(),
1848        };
1849
1850        let json = serde_json::to_string(&strategy).unwrap();
1851        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
1852
1853        assert_eq!(strategy, deserialized);
1854    }
1855
1856    #[test]
1857    fn test_labeled_anomaly_serialization_with_provenance() {
1858        let anomaly = LabeledAnomaly::new(
1859            "ANO001".to_string(),
1860            AnomalyType::Fraud(FraudType::SelfApproval),
1861            "JE001".to_string(),
1862            "JE".to_string(),
1863            "1000".to_string(),
1864            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1865        )
1866        .with_run_id("run-123")
1867        .with_generation_seed(42)
1868        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
1869
1870        let json = serde_json::to_string(&anomaly).unwrap();
1871        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
1872
1873        assert_eq!(anomaly.run_id, deserialized.run_id);
1874        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
1875    }
1876
1877    // ========================================
1878    // FR-003 ENHANCED TAXONOMY TESTS
1879    // ========================================
1880
1881    #[test]
1882    fn test_anomaly_category_from_anomaly_type() {
1883        // Fraud mappings
1884        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
1885        assert_eq!(
1886            AnomalyCategory::from_anomaly_type(&fraud_vendor),
1887            AnomalyCategory::FictitiousVendor
1888        );
1889
1890        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
1891        assert_eq!(
1892            AnomalyCategory::from_anomaly_type(&fraud_kickback),
1893            AnomalyCategory::VendorKickback
1894        );
1895
1896        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
1897        assert_eq!(
1898            AnomalyCategory::from_anomaly_type(&fraud_structured),
1899            AnomalyCategory::StructuredTransaction
1900        );
1901
1902        // Error mappings
1903        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
1904        assert_eq!(
1905            AnomalyCategory::from_anomaly_type(&error_duplicate),
1906            AnomalyCategory::DuplicatePayment
1907        );
1908
1909        // Process issue mappings
1910        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
1911        assert_eq!(
1912            AnomalyCategory::from_anomaly_type(&process_skip),
1913            AnomalyCategory::MissingApproval
1914        );
1915
1916        // Relational mappings
1917        let relational_circular =
1918            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
1919        assert_eq!(
1920            AnomalyCategory::from_anomaly_type(&relational_circular),
1921            AnomalyCategory::CircularFlow
1922        );
1923    }
1924
1925    #[test]
1926    fn test_anomaly_category_ordinal() {
1927        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
1928        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
1929        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
1930    }
1931
1932    #[test]
1933    fn test_contributing_factor() {
1934        let factor = ContributingFactor::new(
1935            FactorType::AmountDeviation,
1936            15000.0,
1937            10000.0,
1938            true,
1939            0.5,
1940            "Amount exceeds threshold",
1941        );
1942
1943        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
1944        assert_eq!(factor.value, 15000.0);
1945        assert_eq!(factor.threshold, 10000.0);
1946        assert!(factor.direction_greater);
1947
1948        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
1949        let contribution = factor.contribution();
1950        assert!((contribution - 0.25).abs() < 0.01);
1951    }
1952
1953    #[test]
1954    fn test_contributing_factor_with_evidence() {
1955        let mut data = HashMap::new();
1956        data.insert("expected".to_string(), "10000".to_string());
1957        data.insert("actual".to_string(), "15000".to_string());
1958
1959        let factor = ContributingFactor::new(
1960            FactorType::AmountDeviation,
1961            15000.0,
1962            10000.0,
1963            true,
1964            0.5,
1965            "Amount deviation detected",
1966        )
1967        .with_evidence("transaction_history", data);
1968
1969        assert!(factor.evidence.is_some());
1970        let evidence = factor.evidence.unwrap();
1971        assert_eq!(evidence.source, "transaction_history");
1972        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
1973    }
1974
1975    #[test]
1976    fn test_enhanced_anomaly_label() {
1977        let base = LabeledAnomaly::new(
1978            "ANO001".to_string(),
1979            AnomalyType::Fraud(FraudType::DuplicatePayment),
1980            "JE001".to_string(),
1981            "JE".to_string(),
1982            "1000".to_string(),
1983            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1984        );
1985
1986        let enhanced = EnhancedAnomalyLabel::from_base(base)
1987            .with_confidence(0.85)
1988            .with_severity(0.7)
1989            .with_factor(ContributingFactor::new(
1990                FactorType::DuplicateIndicator,
1991                1.0,
1992                0.5,
1993                true,
1994                0.4,
1995                "Duplicate payment detected",
1996            ))
1997            .with_secondary_category(AnomalyCategory::StructuredTransaction);
1998
1999        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
2000        assert_eq!(enhanced.enhanced_confidence, 0.85);
2001        assert_eq!(enhanced.enhanced_severity, 0.7);
2002        assert_eq!(enhanced.contributing_factors.len(), 1);
2003        assert_eq!(enhanced.secondary_categories.len(), 1);
2004    }
2005
2006    #[test]
2007    fn test_enhanced_anomaly_label_features() {
2008        let base = LabeledAnomaly::new(
2009            "ANO001".to_string(),
2010            AnomalyType::Fraud(FraudType::SelfApproval),
2011            "JE001".to_string(),
2012            "JE".to_string(),
2013            "1000".to_string(),
2014            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
2015        );
2016
2017        let enhanced = EnhancedAnomalyLabel::from_base(base)
2018            .with_confidence(0.9)
2019            .with_severity(0.8)
2020            .with_factor(ContributingFactor::new(
2021                FactorType::ControlBypass,
2022                1.0,
2023                0.0,
2024                true,
2025                0.5,
2026                "Control bypass detected",
2027            ));
2028
2029        let features = enhanced.to_features();
2030
2031        // Should have 25 features (15 base + 10 enhanced)
2032        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
2033        assert_eq!(features.len(), 25);
2034
2035        // Check enhanced confidence is in features
2036        assert_eq!(features[15], 0.9); // enhanced_confidence
2037
2038        // Check has_control_bypass flag
2039        assert_eq!(features[21], 1.0); // has_control_bypass
2040    }
2041
2042    #[test]
2043    fn test_enhanced_anomaly_label_feature_names() {
2044        let names = EnhancedAnomalyLabel::feature_names();
2045        assert_eq!(names.len(), 25);
2046        assert!(names.contains(&"enhanced_confidence"));
2047        assert!(names.contains(&"enhanced_severity"));
2048        assert!(names.contains(&"has_control_bypass"));
2049    }
2050
2051    #[test]
2052    fn test_factor_type_names() {
2053        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
2054        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
2055        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
2056    }
2057
2058    #[test]
2059    fn test_anomaly_category_serialization() {
2060        let category = AnomalyCategory::CircularFlow;
2061        let json = serde_json::to_string(&category).unwrap();
2062        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
2063        assert_eq!(category, deserialized);
2064
2065        let custom = AnomalyCategory::Custom("custom_type".to_string());
2066        let json = serde_json::to_string(&custom).unwrap();
2067        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
2068        assert_eq!(custom, deserialized);
2069    }
2070
2071    #[test]
2072    fn test_enhanced_label_secondary_category_dedup() {
2073        let base = LabeledAnomaly::new(
2074            "ANO001".to_string(),
2075            AnomalyType::Fraud(FraudType::DuplicatePayment),
2076            "JE001".to_string(),
2077            "JE".to_string(),
2078            "1000".to_string(),
2079            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
2080        );
2081
2082        let enhanced = EnhancedAnomalyLabel::from_base(base)
2083            // Try to add the primary category as secondary (should be ignored)
2084            .with_secondary_category(AnomalyCategory::DuplicatePayment)
2085            // Add a valid secondary
2086            .with_secondary_category(AnomalyCategory::TimingAnomaly)
2087            // Try to add duplicate secondary (should be ignored)
2088            .with_secondary_category(AnomalyCategory::TimingAnomaly);
2089
2090        // Should only have 1 secondary category (TimingAnomaly)
2091        assert_eq!(enhanced.secondary_categories.len(), 1);
2092        assert_eq!(
2093            enhanced.secondary_categories[0],
2094            AnomalyCategory::TimingAnomaly
2095        );
2096    }
2097
2098    // ==========================================================================
2099    // Accounting Standards Fraud Type Tests
2100    // ==========================================================================
2101
2102    #[test]
2103    fn test_revenue_recognition_fraud_types() {
2104        // Test ASC 606/IFRS 15 related fraud types
2105        let fraud_types = [
2106            FraudType::ImproperRevenueRecognition,
2107            FraudType::ImproperPoAllocation,
2108            FraudType::VariableConsiderationManipulation,
2109            FraudType::ContractModificationMisstatement,
2110        ];
2111
2112        for fraud_type in fraud_types {
2113            let anomaly_type = AnomalyType::Fraud(fraud_type);
2114            assert_eq!(anomaly_type.category(), "Fraud");
2115            assert!(anomaly_type.is_intentional());
2116            assert!(anomaly_type.severity() >= 3);
2117        }
2118    }
2119
2120    #[test]
2121    fn test_lease_accounting_fraud_types() {
2122        // Test ASC 842/IFRS 16 related fraud types
2123        let fraud_types = [
2124            FraudType::LeaseClassificationManipulation,
2125            FraudType::OffBalanceSheetLease,
2126            FraudType::LeaseLiabilityUnderstatement,
2127            FraudType::RouAssetMisstatement,
2128        ];
2129
2130        for fraud_type in fraud_types {
2131            let anomaly_type = AnomalyType::Fraud(fraud_type);
2132            assert_eq!(anomaly_type.category(), "Fraud");
2133            assert!(anomaly_type.is_intentional());
2134            assert!(anomaly_type.severity() >= 3);
2135        }
2136
2137        // Off-balance sheet lease fraud should be high severity
2138        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
2139    }
2140
2141    #[test]
2142    fn test_fair_value_fraud_types() {
2143        // Test ASC 820/IFRS 13 related fraud types
2144        let fraud_types = [
2145            FraudType::FairValueHierarchyManipulation,
2146            FraudType::Level3InputManipulation,
2147            FraudType::ValuationTechniqueManipulation,
2148        ];
2149
2150        for fraud_type in fraud_types {
2151            let anomaly_type = AnomalyType::Fraud(fraud_type);
2152            assert_eq!(anomaly_type.category(), "Fraud");
2153            assert!(anomaly_type.is_intentional());
2154            assert!(anomaly_type.severity() >= 4);
2155        }
2156
2157        // Level 3 manipulation is highest severity (unobservable inputs)
2158        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
2159    }
2160
2161    #[test]
2162    fn test_impairment_fraud_types() {
2163        // Test ASC 360/IAS 36 related fraud types
2164        let fraud_types = [
2165            FraudType::DelayedImpairment,
2166            FraudType::ImpairmentTestAvoidance,
2167            FraudType::CashFlowProjectionManipulation,
2168            FraudType::ImproperImpairmentReversal,
2169        ];
2170
2171        for fraud_type in fraud_types {
2172            let anomaly_type = AnomalyType::Fraud(fraud_type);
2173            assert_eq!(anomaly_type.category(), "Fraud");
2174            assert!(anomaly_type.is_intentional());
2175            assert!(anomaly_type.severity() >= 3);
2176        }
2177
2178        // Cash flow manipulation has highest severity
2179        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
2180    }
2181
2182    // ==========================================================================
2183    // Accounting Standards Error Type Tests
2184    // ==========================================================================
2185
2186    #[test]
2187    fn test_standards_error_types() {
2188        // Test non-fraudulent accounting standards errors
2189        let error_types = [
2190            ErrorType::RevenueTimingError,
2191            ErrorType::PoAllocationError,
2192            ErrorType::LeaseClassificationError,
2193            ErrorType::LeaseCalculationError,
2194            ErrorType::FairValueError,
2195            ErrorType::ImpairmentCalculationError,
2196            ErrorType::DiscountRateError,
2197            ErrorType::FrameworkApplicationError,
2198        ];
2199
2200        for error_type in error_types {
2201            let anomaly_type = AnomalyType::Error(error_type);
2202            assert_eq!(anomaly_type.category(), "Error");
2203            assert!(!anomaly_type.is_intentional());
2204            assert!(anomaly_type.severity() >= 3);
2205        }
2206    }
2207
2208    #[test]
2209    fn test_framework_application_error() {
2210        // Test IFRS vs GAAP confusion errors
2211        let error_type = ErrorType::FrameworkApplicationError;
2212        assert_eq!(error_type.severity(), 4);
2213
2214        let anomaly = LabeledAnomaly::new(
2215            "ERR001".to_string(),
2216            AnomalyType::Error(error_type),
2217            "JE100".to_string(),
2218            "JE".to_string(),
2219            "1000".to_string(),
2220            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
2221        )
2222        .with_description("LIFO inventory method used under IFRS (not permitted)")
2223        .with_metadata("framework", "IFRS")
2224        .with_metadata("standard_violated", "IAS 2");
2225
2226        assert_eq!(anomaly.anomaly_type.category(), "Error");
2227        assert_eq!(
2228            anomaly.metadata.get("standard_violated"),
2229            Some(&"IAS 2".to_string())
2230        );
2231    }
2232
2233    #[test]
2234    fn test_standards_anomaly_serialization() {
2235        // Test that new fraud types serialize/deserialize correctly
2236        let fraud_types = [
2237            FraudType::ImproperRevenueRecognition,
2238            FraudType::LeaseClassificationManipulation,
2239            FraudType::FairValueHierarchyManipulation,
2240            FraudType::DelayedImpairment,
2241        ];
2242
2243        for fraud_type in fraud_types {
2244            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
2245            let deserialized: FraudType =
2246                serde_json::from_str(&json).expect("Failed to deserialize");
2247            assert_eq!(fraud_type, deserialized);
2248        }
2249
2250        // Test error types
2251        let error_types = [
2252            ErrorType::RevenueTimingError,
2253            ErrorType::LeaseCalculationError,
2254            ErrorType::FairValueError,
2255            ErrorType::FrameworkApplicationError,
2256        ];
2257
2258        for error_type in error_types {
2259            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
2260            let deserialized: ErrorType =
2261                serde_json::from_str(&json).expect("Failed to deserialize");
2262            assert_eq!(error_type, deserialized);
2263        }
2264    }
2265
2266    #[test]
2267    fn test_standards_labeled_anomaly() {
2268        // Test creating a labeled anomaly for a standards violation
2269        let anomaly = LabeledAnomaly::new(
2270            "STD001".to_string(),
2271            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
2272            "CONTRACT-2024-001".to_string(),
2273            "Revenue".to_string(),
2274            "1000".to_string(),
2275            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2276        )
2277        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
2278        .with_monetary_impact(dec!(500000))
2279        .with_metadata("standard", "ASC 606")
2280        .with_metadata("paragraph", "606-10-25-1")
2281        .with_metadata("contract_id", "C-2024-001")
2282        .with_related_entity("CONTRACT-2024-001")
2283        .with_related_entity("CUSTOMER-500");
2284
2285        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
2286        assert!(anomaly.is_injected);
2287        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
2288        assert_eq!(anomaly.related_entities.len(), 2);
2289        assert_eq!(
2290            anomaly.metadata.get("standard"),
2291            Some(&"ASC 606".to_string())
2292        );
2293    }
2294}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs