1use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20 RandomRate {
22 base_rate: f64,
24 },
25 TemporalPattern {
27 pattern_name: String,
29 },
30 EntityTargeting {
32 target_type: String,
34 target_id: String,
36 },
37 ClusterMembership {
39 cluster_id: String,
41 },
42 ScenarioStep {
44 scenario_type: String,
46 step_number: u32,
48 },
49 DataQualityProfile {
51 profile: String,
53 },
54 MLTrainingBalance {
56 target_class: String,
58 },
59}
60
61#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67 AmountManipulation {
69 original: Decimal,
71 factor: f64,
73 },
74 ThresholdAvoidance {
76 threshold: Decimal,
78 adjusted_amount: Decimal,
80 },
81 DateShift {
83 days_shifted: i32,
85 original_date: NaiveDate,
87 },
88 SelfApproval {
90 user_id: String,
92 },
93 SoDViolation {
95 duty1: String,
97 duty2: String,
99 violating_user: String,
101 },
102 ExactDuplicate {
104 original_doc_id: String,
106 },
107 NearDuplicate {
109 original_doc_id: String,
111 varied_fields: Vec<String>,
113 },
114 CircularFlow {
116 entity_chain: Vec<String>,
118 },
119 SplitTransaction {
121 original_amount: Decimal,
123 split_count: u32,
125 split_doc_ids: Vec<String>,
127 },
128 RoundNumbering {
130 original_amount: Decimal,
132 rounded_amount: Decimal,
134 },
135 TimingManipulation {
137 timing_type: String,
139 original_time: Option<NaiveDateTime>,
141 },
142 AccountMisclassification {
144 correct_account: String,
146 incorrect_account: String,
148 },
149 MissingField {
151 field_name: String,
153 },
154 Custom {
156 name: String,
158 parameters: HashMap<String, String>,
160 },
161}
162
163impl InjectionStrategy {
164 pub fn description(&self) -> String {
166 match self {
167 InjectionStrategy::AmountManipulation { factor, .. } => {
168 format!("Amount multiplied by {:.2}", factor)
169 }
170 InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171 format!("Amount adjusted to avoid {} threshold", threshold)
172 }
173 InjectionStrategy::DateShift { days_shifted, .. } => {
174 if *days_shifted < 0 {
175 format!("Date backdated by {} days", days_shifted.abs())
176 } else {
177 format!("Date forward-dated by {} days", days_shifted)
178 }
179 }
180 InjectionStrategy::SelfApproval { user_id } => {
181 format!("Self-approval by user {}", user_id)
182 }
183 InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184 format!("SoD violation: {} and {}", duty1, duty2)
185 }
186 InjectionStrategy::ExactDuplicate { original_doc_id } => {
187 format!("Exact duplicate of {}", original_doc_id)
188 }
189 InjectionStrategy::NearDuplicate {
190 original_doc_id,
191 varied_fields,
192 } => {
193 format!(
194 "Near-duplicate of {} (varied: {:?})",
195 original_doc_id, varied_fields
196 )
197 }
198 InjectionStrategy::CircularFlow { entity_chain } => {
199 format!("Circular flow through {} entities", entity_chain.len())
200 }
201 InjectionStrategy::SplitTransaction { split_count, .. } => {
202 format!("Split into {} transactions", split_count)
203 }
204 InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
205 InjectionStrategy::TimingManipulation { timing_type, .. } => {
206 format!("Timing manipulation: {}", timing_type)
207 }
208 InjectionStrategy::AccountMisclassification {
209 correct_account,
210 incorrect_account,
211 } => {
212 format!(
213 "Misclassified from {} to {}",
214 correct_account, incorrect_account
215 )
216 }
217 InjectionStrategy::MissingField { field_name } => {
218 format!("Missing required field: {}", field_name)
219 }
220 InjectionStrategy::Custom { name, .. } => format!("Custom: {}", name),
221 }
222 }
223
224 pub fn strategy_type(&self) -> &'static str {
226 match self {
227 InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
228 InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
229 InjectionStrategy::DateShift { .. } => "DateShift",
230 InjectionStrategy::SelfApproval { .. } => "SelfApproval",
231 InjectionStrategy::SoDViolation { .. } => "SoDViolation",
232 InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
233 InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
234 InjectionStrategy::CircularFlow { .. } => "CircularFlow",
235 InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
236 InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
237 InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
238 InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
239 InjectionStrategy::MissingField { .. } => "MissingField",
240 InjectionStrategy::Custom { .. } => "Custom",
241 }
242 }
243}
244
245#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
247pub enum AnomalyType {
248 Fraud(FraudType),
250 Error(ErrorType),
252 ProcessIssue(ProcessIssueType),
254 Statistical(StatisticalAnomalyType),
256 Relational(RelationalAnomalyType),
258 Custom(String),
260}
261
262impl AnomalyType {
263 pub fn category(&self) -> &'static str {
265 match self {
266 AnomalyType::Fraud(_) => "Fraud",
267 AnomalyType::Error(_) => "Error",
268 AnomalyType::ProcessIssue(_) => "ProcessIssue",
269 AnomalyType::Statistical(_) => "Statistical",
270 AnomalyType::Relational(_) => "Relational",
271 AnomalyType::Custom(_) => "Custom",
272 }
273 }
274
275 pub fn type_name(&self) -> String {
277 match self {
278 AnomalyType::Fraud(t) => format!("{:?}", t),
279 AnomalyType::Error(t) => format!("{:?}", t),
280 AnomalyType::ProcessIssue(t) => format!("{:?}", t),
281 AnomalyType::Statistical(t) => format!("{:?}", t),
282 AnomalyType::Relational(t) => format!("{:?}", t),
283 AnomalyType::Custom(s) => s.clone(),
284 }
285 }
286
287 pub fn severity(&self) -> u8 {
289 match self {
290 AnomalyType::Fraud(t) => t.severity(),
291 AnomalyType::Error(t) => t.severity(),
292 AnomalyType::ProcessIssue(t) => t.severity(),
293 AnomalyType::Statistical(t) => t.severity(),
294 AnomalyType::Relational(t) => t.severity(),
295 AnomalyType::Custom(_) => 3,
296 }
297 }
298
299 pub fn is_intentional(&self) -> bool {
301 matches!(self, AnomalyType::Fraud(_))
302 }
303}
304
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
307pub enum FraudType {
308 FictitiousEntry,
311 FictitiousTransaction,
313 RoundDollarManipulation,
315 JustBelowThreshold,
317 RevenueManipulation,
319 ImproperCapitalization,
321 ExpenseCapitalization,
323 ReserveManipulation,
325 SuspenseAccountAbuse,
327 SplitTransaction,
329 TimingAnomaly,
331 UnauthorizedAccess,
333
334 SelfApproval,
337 ExceededApprovalLimit,
339 SegregationOfDutiesViolation,
341 UnauthorizedApproval,
343 CollusiveApproval,
345
346 FictitiousVendor,
349 DuplicatePayment,
351 ShellCompanyPayment,
353 Kickback,
355 KickbackScheme,
357 InvoiceManipulation,
359
360 AssetMisappropriation,
363 InventoryTheft,
365 GhostEmployee,
367
368 PrematureRevenue,
371 UnderstatedLiabilities,
373 OverstatedAssets,
375 ChannelStuffing,
377
378 ImproperRevenueRecognition,
381 ImproperPoAllocation,
383 VariableConsiderationManipulation,
385 ContractModificationMisstatement,
387
388 LeaseClassificationManipulation,
391 OffBalanceSheetLease,
393 LeaseLiabilityUnderstatement,
395 RouAssetMisstatement,
397
398 FairValueHierarchyManipulation,
401 Level3InputManipulation,
403 ValuationTechniqueManipulation,
405
406 DelayedImpairment,
409 ImpairmentTestAvoidance,
411 CashFlowProjectionManipulation,
413 ImproperImpairmentReversal,
415}
416
417impl FraudType {
418 pub fn severity(&self) -> u8 {
420 match self {
421 FraudType::RoundDollarManipulation => 2,
422 FraudType::JustBelowThreshold => 3,
423 FraudType::SelfApproval => 3,
424 FraudType::ExceededApprovalLimit => 3,
425 FraudType::DuplicatePayment => 3,
426 FraudType::FictitiousEntry => 4,
427 FraudType::RevenueManipulation => 5,
428 FraudType::FictitiousVendor => 5,
429 FraudType::ShellCompanyPayment => 5,
430 FraudType::AssetMisappropriation => 5,
431 FraudType::SegregationOfDutiesViolation => 4,
432 FraudType::CollusiveApproval => 5,
433 FraudType::ImproperRevenueRecognition => 5,
435 FraudType::ImproperPoAllocation => 4,
436 FraudType::VariableConsiderationManipulation => 4,
437 FraudType::ContractModificationMisstatement => 3,
438 FraudType::LeaseClassificationManipulation => 4,
440 FraudType::OffBalanceSheetLease => 5,
441 FraudType::LeaseLiabilityUnderstatement => 4,
442 FraudType::RouAssetMisstatement => 3,
443 FraudType::FairValueHierarchyManipulation => 4,
445 FraudType::Level3InputManipulation => 5,
446 FraudType::ValuationTechniqueManipulation => 4,
447 FraudType::DelayedImpairment => 4,
449 FraudType::ImpairmentTestAvoidance => 4,
450 FraudType::CashFlowProjectionManipulation => 5,
451 FraudType::ImproperImpairmentReversal => 3,
452 _ => 4,
453 }
454 }
455}
456
457#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
459pub enum ErrorType {
460 DuplicateEntry,
463 ReversedAmount,
465 TransposedDigits,
467 DecimalError,
469 MissingField,
471 InvalidAccount,
473
474 WrongPeriod,
477 BackdatedEntry,
479 FutureDatedEntry,
481 CutoffError,
483
484 MisclassifiedAccount,
487 WrongCostCenter,
489 WrongCompanyCode,
491
492 UnbalancedEntry,
495 RoundingError,
497 CurrencyError,
499 TaxCalculationError,
501
502 RevenueTimingError,
505 PoAllocationError,
507 LeaseClassificationError,
509 LeaseCalculationError,
511 FairValueError,
513 ImpairmentCalculationError,
515 DiscountRateError,
517 FrameworkApplicationError,
519}
520
521impl ErrorType {
522 pub fn severity(&self) -> u8 {
524 match self {
525 ErrorType::RoundingError => 1,
526 ErrorType::MissingField => 2,
527 ErrorType::TransposedDigits => 2,
528 ErrorType::DecimalError => 3,
529 ErrorType::DuplicateEntry => 3,
530 ErrorType::ReversedAmount => 3,
531 ErrorType::WrongPeriod => 4,
532 ErrorType::UnbalancedEntry => 5,
533 ErrorType::CurrencyError => 4,
534 ErrorType::RevenueTimingError => 4,
536 ErrorType::PoAllocationError => 3,
537 ErrorType::LeaseClassificationError => 3,
538 ErrorType::LeaseCalculationError => 3,
539 ErrorType::FairValueError => 4,
540 ErrorType::ImpairmentCalculationError => 4,
541 ErrorType::DiscountRateError => 3,
542 ErrorType::FrameworkApplicationError => 4,
543 _ => 3,
544 }
545 }
546}
547
548#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
550pub enum ProcessIssueType {
551 SkippedApproval,
554 LateApproval,
556 MissingDocumentation,
558 IncompleteApprovalChain,
560
561 LatePosting,
564 AfterHoursPosting,
566 WeekendPosting,
568 RushedPeriodEnd,
570
571 ManualOverride,
574 UnusualAccess,
576 SystemBypass,
578 BatchAnomaly,
580
581 VagueDescription,
584 PostFactoChange,
586 IncompleteAuditTrail,
588}
589
590impl ProcessIssueType {
591 pub fn severity(&self) -> u8 {
593 match self {
594 ProcessIssueType::VagueDescription => 1,
595 ProcessIssueType::LatePosting => 2,
596 ProcessIssueType::AfterHoursPosting => 2,
597 ProcessIssueType::WeekendPosting => 2,
598 ProcessIssueType::SkippedApproval => 4,
599 ProcessIssueType::ManualOverride => 4,
600 ProcessIssueType::SystemBypass => 5,
601 ProcessIssueType::IncompleteAuditTrail => 4,
602 _ => 3,
603 }
604 }
605}
606
607#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
609pub enum StatisticalAnomalyType {
610 UnusuallyHighAmount,
613 UnusuallyLowAmount,
615 BenfordViolation,
617 ExactDuplicateAmount,
619 RepeatingAmount,
621
622 UnusualFrequency,
625 TransactionBurst,
627 UnusualTiming,
629
630 TrendBreak,
633 LevelShift,
635 SeasonalAnomaly,
637
638 StatisticalOutlier,
641 VarianceChange,
643 DistributionShift,
645}
646
647impl StatisticalAnomalyType {
648 pub fn severity(&self) -> u8 {
650 match self {
651 StatisticalAnomalyType::UnusualTiming => 1,
652 StatisticalAnomalyType::UnusualFrequency => 2,
653 StatisticalAnomalyType::BenfordViolation => 2,
654 StatisticalAnomalyType::UnusuallyHighAmount => 3,
655 StatisticalAnomalyType::TrendBreak => 3,
656 StatisticalAnomalyType::TransactionBurst => 4,
657 StatisticalAnomalyType::ExactDuplicateAmount => 3,
658 _ => 3,
659 }
660 }
661}
662
663#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
665pub enum RelationalAnomalyType {
666 CircularTransaction,
669 UnusualAccountPair,
671 NewCounterparty,
673 DormantAccountActivity,
675
676 CentralityAnomaly,
679 IsolatedCluster,
681 BridgeNodeAnomaly,
683 CommunityAnomaly,
685
686 MissingRelationship,
689 UnexpectedRelationship,
691 RelationshipStrengthChange,
693
694 UnmatchedIntercompany,
697 CircularIntercompany,
699 TransferPricingAnomaly,
701}
702
703impl RelationalAnomalyType {
704 pub fn severity(&self) -> u8 {
706 match self {
707 RelationalAnomalyType::NewCounterparty => 1,
708 RelationalAnomalyType::DormantAccountActivity => 2,
709 RelationalAnomalyType::UnusualAccountPair => 2,
710 RelationalAnomalyType::CircularTransaction => 4,
711 RelationalAnomalyType::CircularIntercompany => 4,
712 RelationalAnomalyType::TransferPricingAnomaly => 4,
713 RelationalAnomalyType::UnmatchedIntercompany => 3,
714 _ => 3,
715 }
716 }
717}
718
719#[derive(Debug, Clone, Serialize, Deserialize)]
721pub struct LabeledAnomaly {
722 pub anomaly_id: String,
724 pub anomaly_type: AnomalyType,
726 pub document_id: String,
728 pub document_type: String,
730 pub company_code: String,
732 pub anomaly_date: NaiveDate,
734 pub detection_timestamp: NaiveDateTime,
736 pub confidence: f64,
738 pub severity: u8,
740 pub description: String,
742 pub related_entities: Vec<String>,
744 pub monetary_impact: Option<Decimal>,
746 pub metadata: HashMap<String, String>,
748 pub is_injected: bool,
750 pub injection_strategy: Option<String>,
752 pub cluster_id: Option<String>,
754
755 #[serde(default, skip_serializing_if = "Option::is_none")]
761 pub original_document_hash: Option<String>,
762
763 #[serde(default, skip_serializing_if = "Option::is_none")]
766 pub causal_reason: Option<AnomalyCausalReason>,
767
768 #[serde(default, skip_serializing_if = "Option::is_none")]
771 pub structured_strategy: Option<InjectionStrategy>,
772
773 #[serde(default, skip_serializing_if = "Option::is_none")]
776 pub parent_anomaly_id: Option<String>,
777
778 #[serde(default, skip_serializing_if = "Vec::is_empty")]
780 pub child_anomaly_ids: Vec<String>,
781
782 #[serde(default, skip_serializing_if = "Option::is_none")]
784 pub scenario_id: Option<String>,
785
786 #[serde(default, skip_serializing_if = "Option::is_none")]
789 pub run_id: Option<String>,
790
791 #[serde(default, skip_serializing_if = "Option::is_none")]
794 pub generation_seed: Option<u64>,
795}
796
797impl LabeledAnomaly {
798 pub fn new(
800 anomaly_id: String,
801 anomaly_type: AnomalyType,
802 document_id: String,
803 document_type: String,
804 company_code: String,
805 anomaly_date: NaiveDate,
806 ) -> Self {
807 let severity = anomaly_type.severity();
808 let description = format!(
809 "{} - {} in document {}",
810 anomaly_type.category(),
811 anomaly_type.type_name(),
812 document_id
813 );
814
815 Self {
816 anomaly_id,
817 anomaly_type,
818 document_id,
819 document_type,
820 company_code,
821 anomaly_date,
822 detection_timestamp: chrono::Local::now().naive_local(),
823 confidence: 1.0,
824 severity,
825 description,
826 related_entities: Vec::new(),
827 monetary_impact: None,
828 metadata: HashMap::new(),
829 is_injected: true,
830 injection_strategy: None,
831 cluster_id: None,
832 original_document_hash: None,
834 causal_reason: None,
835 structured_strategy: None,
836 parent_anomaly_id: None,
837 child_anomaly_ids: Vec::new(),
838 scenario_id: None,
839 run_id: None,
840 generation_seed: None,
841 }
842 }
843
844 pub fn with_description(mut self, description: &str) -> Self {
846 self.description = description.to_string();
847 self
848 }
849
850 pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
852 self.monetary_impact = Some(impact);
853 self
854 }
855
856 pub fn with_related_entity(mut self, entity: &str) -> Self {
858 self.related_entities.push(entity.to_string());
859 self
860 }
861
862 pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
864 self.metadata.insert(key.to_string(), value.to_string());
865 self
866 }
867
868 pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
870 self.injection_strategy = Some(strategy.to_string());
871 self
872 }
873
874 pub fn with_cluster(mut self, cluster_id: &str) -> Self {
876 self.cluster_id = Some(cluster_id.to_string());
877 self
878 }
879
880 pub fn with_original_document_hash(mut self, hash: &str) -> Self {
886 self.original_document_hash = Some(hash.to_string());
887 self
888 }
889
890 pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
892 self.causal_reason = Some(reason);
893 self
894 }
895
896 pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
898 self.injection_strategy = Some(strategy.strategy_type().to_string());
900 self.structured_strategy = Some(strategy);
901 self
902 }
903
904 pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
906 self.parent_anomaly_id = Some(parent_id.to_string());
907 self
908 }
909
910 pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
912 self.child_anomaly_ids.push(child_id.to_string());
913 self
914 }
915
916 pub fn with_scenario(mut self, scenario_id: &str) -> Self {
918 self.scenario_id = Some(scenario_id.to_string());
919 self
920 }
921
922 pub fn with_run_id(mut self, run_id: &str) -> Self {
924 self.run_id = Some(run_id.to_string());
925 self
926 }
927
928 pub fn with_generation_seed(mut self, seed: u64) -> Self {
930 self.generation_seed = Some(seed);
931 self
932 }
933
934 pub fn with_provenance(
936 mut self,
937 run_id: Option<&str>,
938 seed: Option<u64>,
939 causal_reason: Option<AnomalyCausalReason>,
940 ) -> Self {
941 if let Some(id) = run_id {
942 self.run_id = Some(id.to_string());
943 }
944 self.generation_seed = seed;
945 self.causal_reason = causal_reason;
946 self
947 }
948
949 pub fn to_features(&self) -> Vec<f64> {
963 let mut features = Vec::new();
964
965 let categories = [
967 "Fraud",
968 "Error",
969 "ProcessIssue",
970 "Statistical",
971 "Relational",
972 "Custom",
973 ];
974 for cat in &categories {
975 features.push(if self.anomaly_type.category() == *cat {
976 1.0
977 } else {
978 0.0
979 });
980 }
981
982 features.push(self.severity as f64 / 5.0);
984
985 features.push(self.confidence);
987
988 features.push(if self.monetary_impact.is_some() {
990 1.0
991 } else {
992 0.0
993 });
994
995 if let Some(impact) = self.monetary_impact {
997 let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
998 features.push((impact_f64.abs() + 1.0).ln());
999 } else {
1000 features.push(0.0);
1001 }
1002
1003 features.push(if self.anomaly_type.is_intentional() {
1005 1.0
1006 } else {
1007 0.0
1008 });
1009
1010 features.push(self.related_entities.len() as f64);
1012
1013 features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1015
1016 features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1019
1020 features.push(if self.parent_anomaly_id.is_some() {
1022 1.0
1023 } else {
1024 0.0
1025 });
1026
1027 features
1028 }
1029
1030 pub fn feature_count() -> usize {
1032 15 }
1034
1035 pub fn feature_names() -> Vec<&'static str> {
1037 vec![
1038 "category_fraud",
1039 "category_error",
1040 "category_process_issue",
1041 "category_statistical",
1042 "category_relational",
1043 "category_custom",
1044 "severity_normalized",
1045 "confidence",
1046 "has_monetary_impact",
1047 "monetary_impact_log",
1048 "is_intentional",
1049 "related_entity_count",
1050 "is_clustered",
1051 "is_scenario_part",
1052 "is_derived",
1053 ]
1054 }
1055}
1056
1057#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1059pub struct AnomalySummary {
1060 pub total_count: usize,
1062 pub by_category: HashMap<String, usize>,
1064 pub by_type: HashMap<String, usize>,
1066 pub by_severity: HashMap<u8, usize>,
1068 pub by_company: HashMap<String, usize>,
1070 pub total_monetary_impact: Decimal,
1072 pub date_range: Option<(NaiveDate, NaiveDate)>,
1074 pub cluster_count: usize,
1076}
1077
1078impl AnomalySummary {
1079 pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1081 let mut summary = AnomalySummary {
1082 total_count: anomalies.len(),
1083 ..Default::default()
1084 };
1085
1086 let mut min_date: Option<NaiveDate> = None;
1087 let mut max_date: Option<NaiveDate> = None;
1088 let mut clusters = std::collections::HashSet::new();
1089
1090 for anomaly in anomalies {
1091 *summary
1093 .by_category
1094 .entry(anomaly.anomaly_type.category().to_string())
1095 .or_insert(0) += 1;
1096
1097 *summary
1099 .by_type
1100 .entry(anomaly.anomaly_type.type_name())
1101 .or_insert(0) += 1;
1102
1103 *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1105
1106 *summary
1108 .by_company
1109 .entry(anomaly.company_code.clone())
1110 .or_insert(0) += 1;
1111
1112 if let Some(impact) = anomaly.monetary_impact {
1114 summary.total_monetary_impact += impact;
1115 }
1116
1117 match min_date {
1119 None => min_date = Some(anomaly.anomaly_date),
1120 Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1121 _ => {}
1122 }
1123 match max_date {
1124 None => max_date = Some(anomaly.anomaly_date),
1125 Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1126 _ => {}
1127 }
1128
1129 if let Some(cluster_id) = &anomaly.cluster_id {
1131 clusters.insert(cluster_id.clone());
1132 }
1133 }
1134
1135 summary.date_range = min_date.zip(max_date);
1136 summary.cluster_count = clusters.len();
1137
1138 summary
1139 }
1140}
1141
1142#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1151pub enum AnomalyCategory {
1152 FictitiousVendor,
1155 VendorKickback,
1157 RelatedPartyVendor,
1159
1160 DuplicatePayment,
1163 UnauthorizedTransaction,
1165 StructuredTransaction,
1167
1168 CircularFlow,
1171 BehavioralAnomaly,
1173 TimingAnomaly,
1175
1176 JournalAnomaly,
1179 ManualOverride,
1181 MissingApproval,
1183
1184 StatisticalOutlier,
1187 DistributionAnomaly,
1189
1190 Custom(String),
1193}
1194
1195impl AnomalyCategory {
1196 pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1198 match anomaly_type {
1199 AnomalyType::Fraud(fraud_type) => match fraud_type {
1200 FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1201 AnomalyCategory::FictitiousVendor
1202 }
1203 FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1204 FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1205 FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1206 AnomalyCategory::StructuredTransaction
1207 }
1208 FraudType::SelfApproval
1209 | FraudType::UnauthorizedApproval
1210 | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1211 FraudType::TimingAnomaly
1212 | FraudType::RoundDollarManipulation
1213 | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1214 _ => AnomalyCategory::BehavioralAnomaly,
1215 },
1216 AnomalyType::Error(error_type) => match error_type {
1217 ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1218 ErrorType::WrongPeriod
1219 | ErrorType::BackdatedEntry
1220 | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1221 _ => AnomalyCategory::JournalAnomaly,
1222 },
1223 AnomalyType::ProcessIssue(process_type) => match process_type {
1224 ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1225 AnomalyCategory::MissingApproval
1226 }
1227 ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1228 AnomalyCategory::ManualOverride
1229 }
1230 ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1231 AnomalyCategory::TimingAnomaly
1232 }
1233 _ => AnomalyCategory::BehavioralAnomaly,
1234 },
1235 AnomalyType::Statistical(stat_type) => match stat_type {
1236 StatisticalAnomalyType::BenfordViolation
1237 | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1238 _ => AnomalyCategory::StatisticalOutlier,
1239 },
1240 AnomalyType::Relational(rel_type) => match rel_type {
1241 RelationalAnomalyType::CircularTransaction
1242 | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1243 _ => AnomalyCategory::BehavioralAnomaly,
1244 },
1245 AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1246 }
1247 }
1248
1249 pub fn name(&self) -> &str {
1251 match self {
1252 AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1253 AnomalyCategory::VendorKickback => "vendor_kickback",
1254 AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1255 AnomalyCategory::DuplicatePayment => "duplicate_payment",
1256 AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1257 AnomalyCategory::StructuredTransaction => "structured_transaction",
1258 AnomalyCategory::CircularFlow => "circular_flow",
1259 AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1260 AnomalyCategory::TimingAnomaly => "timing_anomaly",
1261 AnomalyCategory::JournalAnomaly => "journal_anomaly",
1262 AnomalyCategory::ManualOverride => "manual_override",
1263 AnomalyCategory::MissingApproval => "missing_approval",
1264 AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1265 AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1266 AnomalyCategory::Custom(s) => s.as_str(),
1267 }
1268 }
1269
1270 pub fn ordinal(&self) -> u8 {
1272 match self {
1273 AnomalyCategory::FictitiousVendor => 0,
1274 AnomalyCategory::VendorKickback => 1,
1275 AnomalyCategory::RelatedPartyVendor => 2,
1276 AnomalyCategory::DuplicatePayment => 3,
1277 AnomalyCategory::UnauthorizedTransaction => 4,
1278 AnomalyCategory::StructuredTransaction => 5,
1279 AnomalyCategory::CircularFlow => 6,
1280 AnomalyCategory::BehavioralAnomaly => 7,
1281 AnomalyCategory::TimingAnomaly => 8,
1282 AnomalyCategory::JournalAnomaly => 9,
1283 AnomalyCategory::ManualOverride => 10,
1284 AnomalyCategory::MissingApproval => 11,
1285 AnomalyCategory::StatisticalOutlier => 12,
1286 AnomalyCategory::DistributionAnomaly => 13,
1287 AnomalyCategory::Custom(_) => 14,
1288 }
1289 }
1290
1291 pub fn category_count() -> usize {
1293 15 }
1295}
1296
1297#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1299pub enum FactorType {
1300 AmountDeviation,
1302 ThresholdProximity,
1304 TimingAnomaly,
1306 EntityRisk,
1308 PatternMatch,
1310 FrequencyDeviation,
1312 RelationshipAnomaly,
1314 ControlBypass,
1316 BenfordViolation,
1318 DuplicateIndicator,
1320 ApprovalChainIssue,
1322 DocumentationGap,
1324 Custom,
1326}
1327
1328impl FactorType {
1329 pub fn name(&self) -> &'static str {
1331 match self {
1332 FactorType::AmountDeviation => "amount_deviation",
1333 FactorType::ThresholdProximity => "threshold_proximity",
1334 FactorType::TimingAnomaly => "timing_anomaly",
1335 FactorType::EntityRisk => "entity_risk",
1336 FactorType::PatternMatch => "pattern_match",
1337 FactorType::FrequencyDeviation => "frequency_deviation",
1338 FactorType::RelationshipAnomaly => "relationship_anomaly",
1339 FactorType::ControlBypass => "control_bypass",
1340 FactorType::BenfordViolation => "benford_violation",
1341 FactorType::DuplicateIndicator => "duplicate_indicator",
1342 FactorType::ApprovalChainIssue => "approval_chain_issue",
1343 FactorType::DocumentationGap => "documentation_gap",
1344 FactorType::Custom => "custom",
1345 }
1346 }
1347}
1348
1349#[derive(Debug, Clone, Serialize, Deserialize)]
1351pub struct FactorEvidence {
1352 pub source: String,
1354 pub data: HashMap<String, String>,
1356}
1357
1358#[derive(Debug, Clone, Serialize, Deserialize)]
1360pub struct ContributingFactor {
1361 pub factor_type: FactorType,
1363 pub value: f64,
1365 pub threshold: f64,
1367 pub direction_greater: bool,
1369 pub weight: f64,
1371 pub description: String,
1373 pub evidence: Option<FactorEvidence>,
1375}
1376
1377impl ContributingFactor {
1378 pub fn new(
1380 factor_type: FactorType,
1381 value: f64,
1382 threshold: f64,
1383 direction_greater: bool,
1384 weight: f64,
1385 description: &str,
1386 ) -> Self {
1387 Self {
1388 factor_type,
1389 value,
1390 threshold,
1391 direction_greater,
1392 weight,
1393 description: description.to_string(),
1394 evidence: None,
1395 }
1396 }
1397
1398 pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1400 self.evidence = Some(FactorEvidence {
1401 source: source.to_string(),
1402 data,
1403 });
1404 self
1405 }
1406
1407 pub fn contribution(&self) -> f64 {
1409 let deviation = if self.direction_greater {
1410 (self.value - self.threshold).max(0.0)
1411 } else {
1412 (self.threshold - self.value).max(0.0)
1413 };
1414
1415 let relative_deviation = if self.threshold.abs() > 0.001 {
1417 deviation / self.threshold.abs()
1418 } else {
1419 deviation
1420 };
1421
1422 (relative_deviation * self.weight).min(1.0)
1424 }
1425}
1426
1427#[derive(Debug, Clone, Serialize, Deserialize)]
1429pub struct EnhancedAnomalyLabel {
1430 pub base: LabeledAnomaly,
1432 pub category: AnomalyCategory,
1434 pub enhanced_confidence: f64,
1436 pub enhanced_severity: f64,
1438 pub contributing_factors: Vec<ContributingFactor>,
1440 pub secondary_categories: Vec<AnomalyCategory>,
1442}
1443
1444impl EnhancedAnomalyLabel {
1445 pub fn from_base(base: LabeledAnomaly) -> Self {
1447 let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1448 let enhanced_confidence = base.confidence;
1449 let enhanced_severity = base.severity as f64 / 5.0;
1450
1451 Self {
1452 base,
1453 category,
1454 enhanced_confidence,
1455 enhanced_severity,
1456 contributing_factors: Vec::new(),
1457 secondary_categories: Vec::new(),
1458 }
1459 }
1460
1461 pub fn with_confidence(mut self, confidence: f64) -> Self {
1463 self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1464 self
1465 }
1466
1467 pub fn with_severity(mut self, severity: f64) -> Self {
1469 self.enhanced_severity = severity.clamp(0.0, 1.0);
1470 self
1471 }
1472
1473 pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1475 self.contributing_factors.push(factor);
1476 self
1477 }
1478
1479 pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1481 if !self.secondary_categories.contains(&category) && category != self.category {
1482 self.secondary_categories.push(category);
1483 }
1484 self
1485 }
1486
1487 pub fn to_features(&self) -> Vec<f64> {
1491 let mut features = self.base.to_features();
1492
1493 features.push(self.enhanced_confidence);
1495 features.push(self.enhanced_severity);
1496 features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1497 features.push(self.secondary_categories.len() as f64);
1498 features.push(self.contributing_factors.len() as f64);
1499
1500 let max_weight = self
1502 .contributing_factors
1503 .iter()
1504 .map(|f| f.weight)
1505 .fold(0.0, f64::max);
1506 features.push(max_weight);
1507
1508 let has_control_bypass = self
1510 .contributing_factors
1511 .iter()
1512 .any(|f| f.factor_type == FactorType::ControlBypass);
1513 features.push(if has_control_bypass { 1.0 } else { 0.0 });
1514
1515 let has_amount_deviation = self
1516 .contributing_factors
1517 .iter()
1518 .any(|f| f.factor_type == FactorType::AmountDeviation);
1519 features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1520
1521 let has_timing = self
1522 .contributing_factors
1523 .iter()
1524 .any(|f| f.factor_type == FactorType::TimingAnomaly);
1525 features.push(if has_timing { 1.0 } else { 0.0 });
1526
1527 let has_pattern_match = self
1528 .contributing_factors
1529 .iter()
1530 .any(|f| f.factor_type == FactorType::PatternMatch);
1531 features.push(if has_pattern_match { 1.0 } else { 0.0 });
1532
1533 features
1534 }
1535
1536 pub fn feature_count() -> usize {
1538 25 }
1540
1541 pub fn feature_names() -> Vec<&'static str> {
1543 let mut names = LabeledAnomaly::feature_names();
1544 names.extend(vec![
1545 "enhanced_confidence",
1546 "enhanced_severity",
1547 "category_ordinal",
1548 "secondary_category_count",
1549 "contributing_factor_count",
1550 "max_factor_weight",
1551 "has_control_bypass",
1552 "has_amount_deviation",
1553 "has_timing_factor",
1554 "has_pattern_match",
1555 ]);
1556 names
1557 }
1558}
1559
1560#[derive(Debug, Clone, Serialize, Deserialize)]
1562pub struct AnomalyRateConfig {
1563 pub total_rate: f64,
1565 pub fraud_rate: f64,
1567 pub error_rate: f64,
1569 pub process_issue_rate: f64,
1571 pub statistical_rate: f64,
1573 pub relational_rate: f64,
1575}
1576
1577impl Default for AnomalyRateConfig {
1578 fn default() -> Self {
1579 Self {
1580 total_rate: 0.02, fraud_rate: 0.25, error_rate: 0.35, process_issue_rate: 0.20, statistical_rate: 0.15, relational_rate: 0.05, }
1587 }
1588}
1589
1590impl AnomalyRateConfig {
1591 pub fn validate(&self) -> Result<(), String> {
1593 let sum = self.fraud_rate
1594 + self.error_rate
1595 + self.process_issue_rate
1596 + self.statistical_rate
1597 + self.relational_rate;
1598
1599 if (sum - 1.0).abs() > 0.01 {
1600 return Err(format!(
1601 "Anomaly category rates must sum to 1.0, got {}",
1602 sum
1603 ));
1604 }
1605
1606 if self.total_rate < 0.0 || self.total_rate > 1.0 {
1607 return Err(format!(
1608 "Total rate must be between 0.0 and 1.0, got {}",
1609 self.total_rate
1610 ));
1611 }
1612
1613 Ok(())
1614 }
1615}
1616
1617#[cfg(test)]
1618mod tests {
1619 use super::*;
1620 use rust_decimal_macros::dec;
1621
1622 #[test]
1623 fn test_anomaly_type_category() {
1624 let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
1625 assert_eq!(fraud.category(), "Fraud");
1626 assert!(fraud.is_intentional());
1627
1628 let error = AnomalyType::Error(ErrorType::DuplicateEntry);
1629 assert_eq!(error.category(), "Error");
1630 assert!(!error.is_intentional());
1631 }
1632
1633 #[test]
1634 fn test_labeled_anomaly() {
1635 let anomaly = LabeledAnomaly::new(
1636 "ANO001".to_string(),
1637 AnomalyType::Fraud(FraudType::SelfApproval),
1638 "JE001".to_string(),
1639 "JE".to_string(),
1640 "1000".to_string(),
1641 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1642 )
1643 .with_description("User approved their own expense report")
1644 .with_related_entity("USER001");
1645
1646 assert_eq!(anomaly.severity, 3);
1647 assert!(anomaly.is_injected);
1648 assert_eq!(anomaly.related_entities.len(), 1);
1649 }
1650
1651 #[test]
1652 fn test_labeled_anomaly_with_provenance() {
1653 let anomaly = LabeledAnomaly::new(
1654 "ANO001".to_string(),
1655 AnomalyType::Fraud(FraudType::SelfApproval),
1656 "JE001".to_string(),
1657 "JE".to_string(),
1658 "1000".to_string(),
1659 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1660 )
1661 .with_run_id("run-123")
1662 .with_generation_seed(42)
1663 .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
1664 .with_structured_strategy(InjectionStrategy::SelfApproval {
1665 user_id: "USER001".to_string(),
1666 })
1667 .with_scenario("scenario-001")
1668 .with_original_document_hash("abc123");
1669
1670 assert_eq!(anomaly.run_id, Some("run-123".to_string()));
1671 assert_eq!(anomaly.generation_seed, Some(42));
1672 assert!(anomaly.causal_reason.is_some());
1673 assert!(anomaly.structured_strategy.is_some());
1674 assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
1675 assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
1676
1677 assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
1679 }
1680
1681 #[test]
1682 fn test_labeled_anomaly_derivation_chain() {
1683 let parent = LabeledAnomaly::new(
1684 "ANO001".to_string(),
1685 AnomalyType::Fraud(FraudType::DuplicatePayment),
1686 "JE001".to_string(),
1687 "JE".to_string(),
1688 "1000".to_string(),
1689 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1690 );
1691
1692 let child = LabeledAnomaly::new(
1693 "ANO002".to_string(),
1694 AnomalyType::Error(ErrorType::DuplicateEntry),
1695 "JE002".to_string(),
1696 "JE".to_string(),
1697 "1000".to_string(),
1698 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1699 )
1700 .with_parent_anomaly(&parent.anomaly_id);
1701
1702 assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
1703 }
1704
1705 #[test]
1706 fn test_injection_strategy_description() {
1707 let strategy = InjectionStrategy::AmountManipulation {
1708 original: dec!(1000),
1709 factor: 2.5,
1710 };
1711 assert_eq!(strategy.description(), "Amount multiplied by 2.50");
1712 assert_eq!(strategy.strategy_type(), "AmountManipulation");
1713
1714 let strategy = InjectionStrategy::ThresholdAvoidance {
1715 threshold: dec!(10000),
1716 adjusted_amount: dec!(9999),
1717 };
1718 assert_eq!(
1719 strategy.description(),
1720 "Amount adjusted to avoid 10000 threshold"
1721 );
1722
1723 let strategy = InjectionStrategy::DateShift {
1724 days_shifted: -5,
1725 original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1726 };
1727 assert_eq!(strategy.description(), "Date backdated by 5 days");
1728
1729 let strategy = InjectionStrategy::DateShift {
1730 days_shifted: 3,
1731 original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1732 };
1733 assert_eq!(strategy.description(), "Date forward-dated by 3 days");
1734 }
1735
1736 #[test]
1737 fn test_causal_reason_variants() {
1738 let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
1739 if let AnomalyCausalReason::RandomRate { base_rate } = reason {
1740 assert!((base_rate - 0.02).abs() < 0.001);
1741 }
1742
1743 let reason = AnomalyCausalReason::TemporalPattern {
1744 pattern_name: "year_end_spike".to_string(),
1745 };
1746 if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
1747 assert_eq!(pattern_name, "year_end_spike");
1748 }
1749
1750 let reason = AnomalyCausalReason::ScenarioStep {
1751 scenario_type: "kickback".to_string(),
1752 step_number: 3,
1753 };
1754 if let AnomalyCausalReason::ScenarioStep {
1755 scenario_type,
1756 step_number,
1757 } = reason
1758 {
1759 assert_eq!(scenario_type, "kickback");
1760 assert_eq!(step_number, 3);
1761 }
1762 }
1763
1764 #[test]
1765 fn test_feature_vector_length() {
1766 let anomaly = LabeledAnomaly::new(
1767 "ANO001".to_string(),
1768 AnomalyType::Fraud(FraudType::SelfApproval),
1769 "JE001".to_string(),
1770 "JE".to_string(),
1771 "1000".to_string(),
1772 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1773 );
1774
1775 let features = anomaly.to_features();
1776 assert_eq!(features.len(), LabeledAnomaly::feature_count());
1777 assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
1778 }
1779
1780 #[test]
1781 fn test_feature_vector_with_provenance() {
1782 let anomaly = LabeledAnomaly::new(
1783 "ANO001".to_string(),
1784 AnomalyType::Fraud(FraudType::SelfApproval),
1785 "JE001".to_string(),
1786 "JE".to_string(),
1787 "1000".to_string(),
1788 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1789 )
1790 .with_scenario("scenario-001")
1791 .with_parent_anomaly("ANO000");
1792
1793 let features = anomaly.to_features();
1794
1795 assert_eq!(features[features.len() - 2], 1.0); assert_eq!(features[features.len() - 1], 1.0); }
1799
1800 #[test]
1801 fn test_anomaly_summary() {
1802 let anomalies = vec![
1803 LabeledAnomaly::new(
1804 "ANO001".to_string(),
1805 AnomalyType::Fraud(FraudType::SelfApproval),
1806 "JE001".to_string(),
1807 "JE".to_string(),
1808 "1000".to_string(),
1809 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1810 ),
1811 LabeledAnomaly::new(
1812 "ANO002".to_string(),
1813 AnomalyType::Error(ErrorType::DuplicateEntry),
1814 "JE002".to_string(),
1815 "JE".to_string(),
1816 "1000".to_string(),
1817 NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
1818 ),
1819 ];
1820
1821 let summary = AnomalySummary::from_anomalies(&anomalies);
1822
1823 assert_eq!(summary.total_count, 2);
1824 assert_eq!(summary.by_category.get("Fraud"), Some(&1));
1825 assert_eq!(summary.by_category.get("Error"), Some(&1));
1826 }
1827
1828 #[test]
1829 fn test_rate_config_validation() {
1830 let config = AnomalyRateConfig::default();
1831 assert!(config.validate().is_ok());
1832
1833 let bad_config = AnomalyRateConfig {
1834 fraud_rate: 0.5,
1835 error_rate: 0.5,
1836 process_issue_rate: 0.5, ..Default::default()
1838 };
1839 assert!(bad_config.validate().is_err());
1840 }
1841
1842 #[test]
1843 fn test_injection_strategy_serialization() {
1844 let strategy = InjectionStrategy::SoDViolation {
1845 duty1: "CreatePO".to_string(),
1846 duty2: "ApprovePO".to_string(),
1847 violating_user: "USER001".to_string(),
1848 };
1849
1850 let json = serde_json::to_string(&strategy).unwrap();
1851 let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
1852
1853 assert_eq!(strategy, deserialized);
1854 }
1855
1856 #[test]
1857 fn test_labeled_anomaly_serialization_with_provenance() {
1858 let anomaly = LabeledAnomaly::new(
1859 "ANO001".to_string(),
1860 AnomalyType::Fraud(FraudType::SelfApproval),
1861 "JE001".to_string(),
1862 "JE".to_string(),
1863 "1000".to_string(),
1864 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1865 )
1866 .with_run_id("run-123")
1867 .with_generation_seed(42)
1868 .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
1869
1870 let json = serde_json::to_string(&anomaly).unwrap();
1871 let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
1872
1873 assert_eq!(anomaly.run_id, deserialized.run_id);
1874 assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
1875 }
1876
1877 #[test]
1882 fn test_anomaly_category_from_anomaly_type() {
1883 let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
1885 assert_eq!(
1886 AnomalyCategory::from_anomaly_type(&fraud_vendor),
1887 AnomalyCategory::FictitiousVendor
1888 );
1889
1890 let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
1891 assert_eq!(
1892 AnomalyCategory::from_anomaly_type(&fraud_kickback),
1893 AnomalyCategory::VendorKickback
1894 );
1895
1896 let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
1897 assert_eq!(
1898 AnomalyCategory::from_anomaly_type(&fraud_structured),
1899 AnomalyCategory::StructuredTransaction
1900 );
1901
1902 let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
1904 assert_eq!(
1905 AnomalyCategory::from_anomaly_type(&error_duplicate),
1906 AnomalyCategory::DuplicatePayment
1907 );
1908
1909 let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
1911 assert_eq!(
1912 AnomalyCategory::from_anomaly_type(&process_skip),
1913 AnomalyCategory::MissingApproval
1914 );
1915
1916 let relational_circular =
1918 AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
1919 assert_eq!(
1920 AnomalyCategory::from_anomaly_type(&relational_circular),
1921 AnomalyCategory::CircularFlow
1922 );
1923 }
1924
1925 #[test]
1926 fn test_anomaly_category_ordinal() {
1927 assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
1928 assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
1929 assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
1930 }
1931
1932 #[test]
1933 fn test_contributing_factor() {
1934 let factor = ContributingFactor::new(
1935 FactorType::AmountDeviation,
1936 15000.0,
1937 10000.0,
1938 true,
1939 0.5,
1940 "Amount exceeds threshold",
1941 );
1942
1943 assert_eq!(factor.factor_type, FactorType::AmountDeviation);
1944 assert_eq!(factor.value, 15000.0);
1945 assert_eq!(factor.threshold, 10000.0);
1946 assert!(factor.direction_greater);
1947
1948 let contribution = factor.contribution();
1950 assert!((contribution - 0.25).abs() < 0.01);
1951 }
1952
1953 #[test]
1954 fn test_contributing_factor_with_evidence() {
1955 let mut data = HashMap::new();
1956 data.insert("expected".to_string(), "10000".to_string());
1957 data.insert("actual".to_string(), "15000".to_string());
1958
1959 let factor = ContributingFactor::new(
1960 FactorType::AmountDeviation,
1961 15000.0,
1962 10000.0,
1963 true,
1964 0.5,
1965 "Amount deviation detected",
1966 )
1967 .with_evidence("transaction_history", data);
1968
1969 assert!(factor.evidence.is_some());
1970 let evidence = factor.evidence.unwrap();
1971 assert_eq!(evidence.source, "transaction_history");
1972 assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
1973 }
1974
1975 #[test]
1976 fn test_enhanced_anomaly_label() {
1977 let base = LabeledAnomaly::new(
1978 "ANO001".to_string(),
1979 AnomalyType::Fraud(FraudType::DuplicatePayment),
1980 "JE001".to_string(),
1981 "JE".to_string(),
1982 "1000".to_string(),
1983 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1984 );
1985
1986 let enhanced = EnhancedAnomalyLabel::from_base(base)
1987 .with_confidence(0.85)
1988 .with_severity(0.7)
1989 .with_factor(ContributingFactor::new(
1990 FactorType::DuplicateIndicator,
1991 1.0,
1992 0.5,
1993 true,
1994 0.4,
1995 "Duplicate payment detected",
1996 ))
1997 .with_secondary_category(AnomalyCategory::StructuredTransaction);
1998
1999 assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
2000 assert_eq!(enhanced.enhanced_confidence, 0.85);
2001 assert_eq!(enhanced.enhanced_severity, 0.7);
2002 assert_eq!(enhanced.contributing_factors.len(), 1);
2003 assert_eq!(enhanced.secondary_categories.len(), 1);
2004 }
2005
2006 #[test]
2007 fn test_enhanced_anomaly_label_features() {
2008 let base = LabeledAnomaly::new(
2009 "ANO001".to_string(),
2010 AnomalyType::Fraud(FraudType::SelfApproval),
2011 "JE001".to_string(),
2012 "JE".to_string(),
2013 "1000".to_string(),
2014 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
2015 );
2016
2017 let enhanced = EnhancedAnomalyLabel::from_base(base)
2018 .with_confidence(0.9)
2019 .with_severity(0.8)
2020 .with_factor(ContributingFactor::new(
2021 FactorType::ControlBypass,
2022 1.0,
2023 0.0,
2024 true,
2025 0.5,
2026 "Control bypass detected",
2027 ));
2028
2029 let features = enhanced.to_features();
2030
2031 assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
2033 assert_eq!(features.len(), 25);
2034
2035 assert_eq!(features[15], 0.9); assert_eq!(features[21], 1.0); }
2041
2042 #[test]
2043 fn test_enhanced_anomaly_label_feature_names() {
2044 let names = EnhancedAnomalyLabel::feature_names();
2045 assert_eq!(names.len(), 25);
2046 assert!(names.contains(&"enhanced_confidence"));
2047 assert!(names.contains(&"enhanced_severity"));
2048 assert!(names.contains(&"has_control_bypass"));
2049 }
2050
2051 #[test]
2052 fn test_factor_type_names() {
2053 assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
2054 assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
2055 assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
2056 }
2057
2058 #[test]
2059 fn test_anomaly_category_serialization() {
2060 let category = AnomalyCategory::CircularFlow;
2061 let json = serde_json::to_string(&category).unwrap();
2062 let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
2063 assert_eq!(category, deserialized);
2064
2065 let custom = AnomalyCategory::Custom("custom_type".to_string());
2066 let json = serde_json::to_string(&custom).unwrap();
2067 let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
2068 assert_eq!(custom, deserialized);
2069 }
2070
2071 #[test]
2072 fn test_enhanced_label_secondary_category_dedup() {
2073 let base = LabeledAnomaly::new(
2074 "ANO001".to_string(),
2075 AnomalyType::Fraud(FraudType::DuplicatePayment),
2076 "JE001".to_string(),
2077 "JE".to_string(),
2078 "1000".to_string(),
2079 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
2080 );
2081
2082 let enhanced = EnhancedAnomalyLabel::from_base(base)
2083 .with_secondary_category(AnomalyCategory::DuplicatePayment)
2085 .with_secondary_category(AnomalyCategory::TimingAnomaly)
2087 .with_secondary_category(AnomalyCategory::TimingAnomaly);
2089
2090 assert_eq!(enhanced.secondary_categories.len(), 1);
2092 assert_eq!(
2093 enhanced.secondary_categories[0],
2094 AnomalyCategory::TimingAnomaly
2095 );
2096 }
2097
2098 #[test]
2103 fn test_revenue_recognition_fraud_types() {
2104 let fraud_types = [
2106 FraudType::ImproperRevenueRecognition,
2107 FraudType::ImproperPoAllocation,
2108 FraudType::VariableConsiderationManipulation,
2109 FraudType::ContractModificationMisstatement,
2110 ];
2111
2112 for fraud_type in fraud_types {
2113 let anomaly_type = AnomalyType::Fraud(fraud_type);
2114 assert_eq!(anomaly_type.category(), "Fraud");
2115 assert!(anomaly_type.is_intentional());
2116 assert!(anomaly_type.severity() >= 3);
2117 }
2118 }
2119
2120 #[test]
2121 fn test_lease_accounting_fraud_types() {
2122 let fraud_types = [
2124 FraudType::LeaseClassificationManipulation,
2125 FraudType::OffBalanceSheetLease,
2126 FraudType::LeaseLiabilityUnderstatement,
2127 FraudType::RouAssetMisstatement,
2128 ];
2129
2130 for fraud_type in fraud_types {
2131 let anomaly_type = AnomalyType::Fraud(fraud_type);
2132 assert_eq!(anomaly_type.category(), "Fraud");
2133 assert!(anomaly_type.is_intentional());
2134 assert!(anomaly_type.severity() >= 3);
2135 }
2136
2137 assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
2139 }
2140
2141 #[test]
2142 fn test_fair_value_fraud_types() {
2143 let fraud_types = [
2145 FraudType::FairValueHierarchyManipulation,
2146 FraudType::Level3InputManipulation,
2147 FraudType::ValuationTechniqueManipulation,
2148 ];
2149
2150 for fraud_type in fraud_types {
2151 let anomaly_type = AnomalyType::Fraud(fraud_type);
2152 assert_eq!(anomaly_type.category(), "Fraud");
2153 assert!(anomaly_type.is_intentional());
2154 assert!(anomaly_type.severity() >= 4);
2155 }
2156
2157 assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
2159 }
2160
2161 #[test]
2162 fn test_impairment_fraud_types() {
2163 let fraud_types = [
2165 FraudType::DelayedImpairment,
2166 FraudType::ImpairmentTestAvoidance,
2167 FraudType::CashFlowProjectionManipulation,
2168 FraudType::ImproperImpairmentReversal,
2169 ];
2170
2171 for fraud_type in fraud_types {
2172 let anomaly_type = AnomalyType::Fraud(fraud_type);
2173 assert_eq!(anomaly_type.category(), "Fraud");
2174 assert!(anomaly_type.is_intentional());
2175 assert!(anomaly_type.severity() >= 3);
2176 }
2177
2178 assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
2180 }
2181
2182 #[test]
2187 fn test_standards_error_types() {
2188 let error_types = [
2190 ErrorType::RevenueTimingError,
2191 ErrorType::PoAllocationError,
2192 ErrorType::LeaseClassificationError,
2193 ErrorType::LeaseCalculationError,
2194 ErrorType::FairValueError,
2195 ErrorType::ImpairmentCalculationError,
2196 ErrorType::DiscountRateError,
2197 ErrorType::FrameworkApplicationError,
2198 ];
2199
2200 for error_type in error_types {
2201 let anomaly_type = AnomalyType::Error(error_type);
2202 assert_eq!(anomaly_type.category(), "Error");
2203 assert!(!anomaly_type.is_intentional());
2204 assert!(anomaly_type.severity() >= 3);
2205 }
2206 }
2207
2208 #[test]
2209 fn test_framework_application_error() {
2210 let error_type = ErrorType::FrameworkApplicationError;
2212 assert_eq!(error_type.severity(), 4);
2213
2214 let anomaly = LabeledAnomaly::new(
2215 "ERR001".to_string(),
2216 AnomalyType::Error(error_type),
2217 "JE100".to_string(),
2218 "JE".to_string(),
2219 "1000".to_string(),
2220 NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
2221 )
2222 .with_description("LIFO inventory method used under IFRS (not permitted)")
2223 .with_metadata("framework", "IFRS")
2224 .with_metadata("standard_violated", "IAS 2");
2225
2226 assert_eq!(anomaly.anomaly_type.category(), "Error");
2227 assert_eq!(
2228 anomaly.metadata.get("standard_violated"),
2229 Some(&"IAS 2".to_string())
2230 );
2231 }
2232
2233 #[test]
2234 fn test_standards_anomaly_serialization() {
2235 let fraud_types = [
2237 FraudType::ImproperRevenueRecognition,
2238 FraudType::LeaseClassificationManipulation,
2239 FraudType::FairValueHierarchyManipulation,
2240 FraudType::DelayedImpairment,
2241 ];
2242
2243 for fraud_type in fraud_types {
2244 let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
2245 let deserialized: FraudType =
2246 serde_json::from_str(&json).expect("Failed to deserialize");
2247 assert_eq!(fraud_type, deserialized);
2248 }
2249
2250 let error_types = [
2252 ErrorType::RevenueTimingError,
2253 ErrorType::LeaseCalculationError,
2254 ErrorType::FairValueError,
2255 ErrorType::FrameworkApplicationError,
2256 ];
2257
2258 for error_type in error_types {
2259 let json = serde_json::to_string(&error_type).expect("Failed to serialize");
2260 let deserialized: ErrorType =
2261 serde_json::from_str(&json).expect("Failed to deserialize");
2262 assert_eq!(error_type, deserialized);
2263 }
2264 }
2265
2266 #[test]
2267 fn test_standards_labeled_anomaly() {
2268 let anomaly = LabeledAnomaly::new(
2270 "STD001".to_string(),
2271 AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
2272 "CONTRACT-2024-001".to_string(),
2273 "Revenue".to_string(),
2274 "1000".to_string(),
2275 NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
2276 )
2277 .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
2278 .with_monetary_impact(dec!(500000))
2279 .with_metadata("standard", "ASC 606")
2280 .with_metadata("paragraph", "606-10-25-1")
2281 .with_metadata("contract_id", "C-2024-001")
2282 .with_related_entity("CONTRACT-2024-001")
2283 .with_related_entity("CUSTOMER-500");
2284
2285 assert_eq!(anomaly.severity, 5); assert!(anomaly.is_injected);
2287 assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
2288 assert_eq!(anomaly.related_entities.len(), 2);
2289 assert_eq!(
2290 anomaly.metadata.get("standard"),
2291 Some(&"ASC 606".to_string())
2292 );
2293 }
2294}