1use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20 RandomRate {
22 base_rate: f64,
24 },
25 TemporalPattern {
27 pattern_name: String,
29 },
30 EntityTargeting {
32 target_type: String,
34 target_id: String,
36 },
37 ClusterMembership {
39 cluster_id: String,
41 },
42 ScenarioStep {
44 scenario_type: String,
46 step_number: u32,
48 },
49 DataQualityProfile {
51 profile: String,
53 },
54 MLTrainingBalance {
56 target_class: String,
58 },
59}
60
61#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67 AmountManipulation {
69 original: Decimal,
71 factor: f64,
73 },
74 ThresholdAvoidance {
76 threshold: Decimal,
78 adjusted_amount: Decimal,
80 },
81 DateShift {
83 days_shifted: i32,
85 original_date: NaiveDate,
87 },
88 SelfApproval {
90 user_id: String,
92 },
93 SoDViolation {
95 duty1: String,
97 duty2: String,
99 violating_user: String,
101 },
102 ExactDuplicate {
104 original_doc_id: String,
106 },
107 NearDuplicate {
109 original_doc_id: String,
111 varied_fields: Vec<String>,
113 },
114 CircularFlow {
116 entity_chain: Vec<String>,
118 },
119 SplitTransaction {
121 original_amount: Decimal,
123 split_count: u32,
125 split_doc_ids: Vec<String>,
127 },
128 RoundNumbering {
130 original_amount: Decimal,
132 rounded_amount: Decimal,
134 },
135 TimingManipulation {
137 timing_type: String,
139 original_time: Option<NaiveDateTime>,
141 },
142 AccountMisclassification {
144 correct_account: String,
146 incorrect_account: String,
148 },
149 MissingField {
151 field_name: String,
153 },
154 Custom {
156 name: String,
158 parameters: HashMap<String, String>,
160 },
161}
162
163impl InjectionStrategy {
164 pub fn description(&self) -> String {
166 match self {
167 InjectionStrategy::AmountManipulation { factor, .. } => {
168 format!("Amount multiplied by {:.2}", factor)
169 }
170 InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171 format!("Amount adjusted to avoid {} threshold", threshold)
172 }
173 InjectionStrategy::DateShift { days_shifted, .. } => {
174 if *days_shifted < 0 {
175 format!("Date backdated by {} days", days_shifted.abs())
176 } else {
177 format!("Date forward-dated by {} days", days_shifted)
178 }
179 }
180 InjectionStrategy::SelfApproval { user_id } => {
181 format!("Self-approval by user {}", user_id)
182 }
183 InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184 format!("SoD violation: {} and {}", duty1, duty2)
185 }
186 InjectionStrategy::ExactDuplicate { original_doc_id } => {
187 format!("Exact duplicate of {}", original_doc_id)
188 }
189 InjectionStrategy::NearDuplicate {
190 original_doc_id,
191 varied_fields,
192 } => {
193 format!(
194 "Near-duplicate of {} (varied: {:?})",
195 original_doc_id, varied_fields
196 )
197 }
198 InjectionStrategy::CircularFlow { entity_chain } => {
199 format!("Circular flow through {} entities", entity_chain.len())
200 }
201 InjectionStrategy::SplitTransaction { split_count, .. } => {
202 format!("Split into {} transactions", split_count)
203 }
204 InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
205 InjectionStrategy::TimingManipulation { timing_type, .. } => {
206 format!("Timing manipulation: {}", timing_type)
207 }
208 InjectionStrategy::AccountMisclassification {
209 correct_account,
210 incorrect_account,
211 } => {
212 format!(
213 "Misclassified from {} to {}",
214 correct_account, incorrect_account
215 )
216 }
217 InjectionStrategy::MissingField { field_name } => {
218 format!("Missing required field: {}", field_name)
219 }
220 InjectionStrategy::Custom { name, .. } => format!("Custom: {}", name),
221 }
222 }
223
224 pub fn strategy_type(&self) -> &'static str {
226 match self {
227 InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
228 InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
229 InjectionStrategy::DateShift { .. } => "DateShift",
230 InjectionStrategy::SelfApproval { .. } => "SelfApproval",
231 InjectionStrategy::SoDViolation { .. } => "SoDViolation",
232 InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
233 InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
234 InjectionStrategy::CircularFlow { .. } => "CircularFlow",
235 InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
236 InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
237 InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
238 InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
239 InjectionStrategy::MissingField { .. } => "MissingField",
240 InjectionStrategy::Custom { .. } => "Custom",
241 }
242 }
243}
244
245#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
247pub enum AnomalyType {
248 Fraud(FraudType),
250 Error(ErrorType),
252 ProcessIssue(ProcessIssueType),
254 Statistical(StatisticalAnomalyType),
256 Relational(RelationalAnomalyType),
258 Custom(String),
260}
261
262impl AnomalyType {
263 pub fn category(&self) -> &'static str {
265 match self {
266 AnomalyType::Fraud(_) => "Fraud",
267 AnomalyType::Error(_) => "Error",
268 AnomalyType::ProcessIssue(_) => "ProcessIssue",
269 AnomalyType::Statistical(_) => "Statistical",
270 AnomalyType::Relational(_) => "Relational",
271 AnomalyType::Custom(_) => "Custom",
272 }
273 }
274
275 pub fn type_name(&self) -> String {
277 match self {
278 AnomalyType::Fraud(t) => format!("{:?}", t),
279 AnomalyType::Error(t) => format!("{:?}", t),
280 AnomalyType::ProcessIssue(t) => format!("{:?}", t),
281 AnomalyType::Statistical(t) => format!("{:?}", t),
282 AnomalyType::Relational(t) => format!("{:?}", t),
283 AnomalyType::Custom(s) => s.clone(),
284 }
285 }
286
287 pub fn severity(&self) -> u8 {
289 match self {
290 AnomalyType::Fraud(t) => t.severity(),
291 AnomalyType::Error(t) => t.severity(),
292 AnomalyType::ProcessIssue(t) => t.severity(),
293 AnomalyType::Statistical(t) => t.severity(),
294 AnomalyType::Relational(t) => t.severity(),
295 AnomalyType::Custom(_) => 3,
296 }
297 }
298
299 pub fn is_intentional(&self) -> bool {
301 matches!(self, AnomalyType::Fraud(_))
302 }
303}
304
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
307pub enum FraudType {
308 FictitiousEntry,
311 FictitiousTransaction,
313 RoundDollarManipulation,
315 JustBelowThreshold,
317 RevenueManipulation,
319 ImproperCapitalization,
321 ExpenseCapitalization,
323 ReserveManipulation,
325 SuspenseAccountAbuse,
327 SplitTransaction,
329 TimingAnomaly,
331 UnauthorizedAccess,
333
334 SelfApproval,
337 ExceededApprovalLimit,
339 SegregationOfDutiesViolation,
341 UnauthorizedApproval,
343 CollusiveApproval,
345
346 FictitiousVendor,
349 DuplicatePayment,
351 ShellCompanyPayment,
353 Kickback,
355 KickbackScheme,
357 InvoiceManipulation,
359
360 AssetMisappropriation,
363 InventoryTheft,
365 GhostEmployee,
367
368 PrematureRevenue,
371 UnderstatedLiabilities,
373 OverstatedAssets,
375 ChannelStuffing,
377}
378
379impl FraudType {
380 pub fn severity(&self) -> u8 {
382 match self {
383 FraudType::RoundDollarManipulation => 2,
384 FraudType::JustBelowThreshold => 3,
385 FraudType::SelfApproval => 3,
386 FraudType::ExceededApprovalLimit => 3,
387 FraudType::DuplicatePayment => 3,
388 FraudType::FictitiousEntry => 4,
389 FraudType::RevenueManipulation => 5,
390 FraudType::FictitiousVendor => 5,
391 FraudType::ShellCompanyPayment => 5,
392 FraudType::AssetMisappropriation => 5,
393 FraudType::SegregationOfDutiesViolation => 4,
394 FraudType::CollusiveApproval => 5,
395 _ => 4,
396 }
397 }
398}
399
400#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
402pub enum ErrorType {
403 DuplicateEntry,
406 ReversedAmount,
408 TransposedDigits,
410 DecimalError,
412 MissingField,
414 InvalidAccount,
416
417 WrongPeriod,
420 BackdatedEntry,
422 FutureDatedEntry,
424 CutoffError,
426
427 MisclassifiedAccount,
430 WrongCostCenter,
432 WrongCompanyCode,
434
435 UnbalancedEntry,
438 RoundingError,
440 CurrencyError,
442 TaxCalculationError,
444}
445
446impl ErrorType {
447 pub fn severity(&self) -> u8 {
449 match self {
450 ErrorType::RoundingError => 1,
451 ErrorType::MissingField => 2,
452 ErrorType::TransposedDigits => 2,
453 ErrorType::DecimalError => 3,
454 ErrorType::DuplicateEntry => 3,
455 ErrorType::ReversedAmount => 3,
456 ErrorType::WrongPeriod => 4,
457 ErrorType::UnbalancedEntry => 5,
458 ErrorType::CurrencyError => 4,
459 _ => 3,
460 }
461 }
462}
463
464#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
466pub enum ProcessIssueType {
467 SkippedApproval,
470 LateApproval,
472 MissingDocumentation,
474 IncompleteApprovalChain,
476
477 LatePosting,
480 AfterHoursPosting,
482 WeekendPosting,
484 RushedPeriodEnd,
486
487 ManualOverride,
490 UnusualAccess,
492 SystemBypass,
494 BatchAnomaly,
496
497 VagueDescription,
500 PostFactoChange,
502 IncompleteAuditTrail,
504}
505
506impl ProcessIssueType {
507 pub fn severity(&self) -> u8 {
509 match self {
510 ProcessIssueType::VagueDescription => 1,
511 ProcessIssueType::LatePosting => 2,
512 ProcessIssueType::AfterHoursPosting => 2,
513 ProcessIssueType::WeekendPosting => 2,
514 ProcessIssueType::SkippedApproval => 4,
515 ProcessIssueType::ManualOverride => 4,
516 ProcessIssueType::SystemBypass => 5,
517 ProcessIssueType::IncompleteAuditTrail => 4,
518 _ => 3,
519 }
520 }
521}
522
523#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
525pub enum StatisticalAnomalyType {
526 UnusuallyHighAmount,
529 UnusuallyLowAmount,
531 BenfordViolation,
533 ExactDuplicateAmount,
535 RepeatingAmount,
537
538 UnusualFrequency,
541 TransactionBurst,
543 UnusualTiming,
545
546 TrendBreak,
549 LevelShift,
551 SeasonalAnomaly,
553
554 StatisticalOutlier,
557 VarianceChange,
559 DistributionShift,
561}
562
563impl StatisticalAnomalyType {
564 pub fn severity(&self) -> u8 {
566 match self {
567 StatisticalAnomalyType::UnusualTiming => 1,
568 StatisticalAnomalyType::UnusualFrequency => 2,
569 StatisticalAnomalyType::BenfordViolation => 2,
570 StatisticalAnomalyType::UnusuallyHighAmount => 3,
571 StatisticalAnomalyType::TrendBreak => 3,
572 StatisticalAnomalyType::TransactionBurst => 4,
573 StatisticalAnomalyType::ExactDuplicateAmount => 3,
574 _ => 3,
575 }
576 }
577}
578
579#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
581pub enum RelationalAnomalyType {
582 CircularTransaction,
585 UnusualAccountPair,
587 NewCounterparty,
589 DormantAccountActivity,
591
592 CentralityAnomaly,
595 IsolatedCluster,
597 BridgeNodeAnomaly,
599 CommunityAnomaly,
601
602 MissingRelationship,
605 UnexpectedRelationship,
607 RelationshipStrengthChange,
609
610 UnmatchedIntercompany,
613 CircularIntercompany,
615 TransferPricingAnomaly,
617}
618
619impl RelationalAnomalyType {
620 pub fn severity(&self) -> u8 {
622 match self {
623 RelationalAnomalyType::NewCounterparty => 1,
624 RelationalAnomalyType::DormantAccountActivity => 2,
625 RelationalAnomalyType::UnusualAccountPair => 2,
626 RelationalAnomalyType::CircularTransaction => 4,
627 RelationalAnomalyType::CircularIntercompany => 4,
628 RelationalAnomalyType::TransferPricingAnomaly => 4,
629 RelationalAnomalyType::UnmatchedIntercompany => 3,
630 _ => 3,
631 }
632 }
633}
634
635#[derive(Debug, Clone, Serialize, Deserialize)]
637pub struct LabeledAnomaly {
638 pub anomaly_id: String,
640 pub anomaly_type: AnomalyType,
642 pub document_id: String,
644 pub document_type: String,
646 pub company_code: String,
648 pub anomaly_date: NaiveDate,
650 pub detection_timestamp: NaiveDateTime,
652 pub confidence: f64,
654 pub severity: u8,
656 pub description: String,
658 pub related_entities: Vec<String>,
660 pub monetary_impact: Option<Decimal>,
662 pub metadata: HashMap<String, String>,
664 pub is_injected: bool,
666 pub injection_strategy: Option<String>,
668 pub cluster_id: Option<String>,
670
671 #[serde(default, skip_serializing_if = "Option::is_none")]
677 pub original_document_hash: Option<String>,
678
679 #[serde(default, skip_serializing_if = "Option::is_none")]
682 pub causal_reason: Option<AnomalyCausalReason>,
683
684 #[serde(default, skip_serializing_if = "Option::is_none")]
687 pub structured_strategy: Option<InjectionStrategy>,
688
689 #[serde(default, skip_serializing_if = "Option::is_none")]
692 pub parent_anomaly_id: Option<String>,
693
694 #[serde(default, skip_serializing_if = "Vec::is_empty")]
696 pub child_anomaly_ids: Vec<String>,
697
698 #[serde(default, skip_serializing_if = "Option::is_none")]
700 pub scenario_id: Option<String>,
701
702 #[serde(default, skip_serializing_if = "Option::is_none")]
705 pub run_id: Option<String>,
706
707 #[serde(default, skip_serializing_if = "Option::is_none")]
710 pub generation_seed: Option<u64>,
711}
712
713impl LabeledAnomaly {
714 pub fn new(
716 anomaly_id: String,
717 anomaly_type: AnomalyType,
718 document_id: String,
719 document_type: String,
720 company_code: String,
721 anomaly_date: NaiveDate,
722 ) -> Self {
723 let severity = anomaly_type.severity();
724 let description = format!(
725 "{} - {} in document {}",
726 anomaly_type.category(),
727 anomaly_type.type_name(),
728 document_id
729 );
730
731 Self {
732 anomaly_id,
733 anomaly_type,
734 document_id,
735 document_type,
736 company_code,
737 anomaly_date,
738 detection_timestamp: chrono::Local::now().naive_local(),
739 confidence: 1.0,
740 severity,
741 description,
742 related_entities: Vec::new(),
743 monetary_impact: None,
744 metadata: HashMap::new(),
745 is_injected: true,
746 injection_strategy: None,
747 cluster_id: None,
748 original_document_hash: None,
750 causal_reason: None,
751 structured_strategy: None,
752 parent_anomaly_id: None,
753 child_anomaly_ids: Vec::new(),
754 scenario_id: None,
755 run_id: None,
756 generation_seed: None,
757 }
758 }
759
760 pub fn with_description(mut self, description: &str) -> Self {
762 self.description = description.to_string();
763 self
764 }
765
766 pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
768 self.monetary_impact = Some(impact);
769 self
770 }
771
772 pub fn with_related_entity(mut self, entity: &str) -> Self {
774 self.related_entities.push(entity.to_string());
775 self
776 }
777
778 pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
780 self.metadata.insert(key.to_string(), value.to_string());
781 self
782 }
783
784 pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
786 self.injection_strategy = Some(strategy.to_string());
787 self
788 }
789
790 pub fn with_cluster(mut self, cluster_id: &str) -> Self {
792 self.cluster_id = Some(cluster_id.to_string());
793 self
794 }
795
796 pub fn with_original_document_hash(mut self, hash: &str) -> Self {
802 self.original_document_hash = Some(hash.to_string());
803 self
804 }
805
806 pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
808 self.causal_reason = Some(reason);
809 self
810 }
811
812 pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
814 self.injection_strategy = Some(strategy.strategy_type().to_string());
816 self.structured_strategy = Some(strategy);
817 self
818 }
819
820 pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
822 self.parent_anomaly_id = Some(parent_id.to_string());
823 self
824 }
825
826 pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
828 self.child_anomaly_ids.push(child_id.to_string());
829 self
830 }
831
832 pub fn with_scenario(mut self, scenario_id: &str) -> Self {
834 self.scenario_id = Some(scenario_id.to_string());
835 self
836 }
837
838 pub fn with_run_id(mut self, run_id: &str) -> Self {
840 self.run_id = Some(run_id.to_string());
841 self
842 }
843
844 pub fn with_generation_seed(mut self, seed: u64) -> Self {
846 self.generation_seed = Some(seed);
847 self
848 }
849
850 pub fn with_provenance(
852 mut self,
853 run_id: Option<&str>,
854 seed: Option<u64>,
855 causal_reason: Option<AnomalyCausalReason>,
856 ) -> Self {
857 if let Some(id) = run_id {
858 self.run_id = Some(id.to_string());
859 }
860 self.generation_seed = seed;
861 self.causal_reason = causal_reason;
862 self
863 }
864
865 pub fn to_features(&self) -> Vec<f64> {
879 let mut features = Vec::new();
880
881 let categories = [
883 "Fraud",
884 "Error",
885 "ProcessIssue",
886 "Statistical",
887 "Relational",
888 "Custom",
889 ];
890 for cat in &categories {
891 features.push(if self.anomaly_type.category() == *cat {
892 1.0
893 } else {
894 0.0
895 });
896 }
897
898 features.push(self.severity as f64 / 5.0);
900
901 features.push(self.confidence);
903
904 features.push(if self.monetary_impact.is_some() {
906 1.0
907 } else {
908 0.0
909 });
910
911 if let Some(impact) = self.monetary_impact {
913 let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
914 features.push((impact_f64.abs() + 1.0).ln());
915 } else {
916 features.push(0.0);
917 }
918
919 features.push(if self.anomaly_type.is_intentional() {
921 1.0
922 } else {
923 0.0
924 });
925
926 features.push(self.related_entities.len() as f64);
928
929 features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
931
932 features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
935
936 features.push(if self.parent_anomaly_id.is_some() {
938 1.0
939 } else {
940 0.0
941 });
942
943 features
944 }
945
946 pub fn feature_count() -> usize {
948 15 }
950
951 pub fn feature_names() -> Vec<&'static str> {
953 vec![
954 "category_fraud",
955 "category_error",
956 "category_process_issue",
957 "category_statistical",
958 "category_relational",
959 "category_custom",
960 "severity_normalized",
961 "confidence",
962 "has_monetary_impact",
963 "monetary_impact_log",
964 "is_intentional",
965 "related_entity_count",
966 "is_clustered",
967 "is_scenario_part",
968 "is_derived",
969 ]
970 }
971}
972
973#[derive(Debug, Clone, Default, Serialize, Deserialize)]
975pub struct AnomalySummary {
976 pub total_count: usize,
978 pub by_category: HashMap<String, usize>,
980 pub by_type: HashMap<String, usize>,
982 pub by_severity: HashMap<u8, usize>,
984 pub by_company: HashMap<String, usize>,
986 pub total_monetary_impact: Decimal,
988 pub date_range: Option<(NaiveDate, NaiveDate)>,
990 pub cluster_count: usize,
992}
993
994impl AnomalySummary {
995 pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
997 let mut summary = AnomalySummary {
998 total_count: anomalies.len(),
999 ..Default::default()
1000 };
1001
1002 let mut min_date: Option<NaiveDate> = None;
1003 let mut max_date: Option<NaiveDate> = None;
1004 let mut clusters = std::collections::HashSet::new();
1005
1006 for anomaly in anomalies {
1007 *summary
1009 .by_category
1010 .entry(anomaly.anomaly_type.category().to_string())
1011 .or_insert(0) += 1;
1012
1013 *summary
1015 .by_type
1016 .entry(anomaly.anomaly_type.type_name())
1017 .or_insert(0) += 1;
1018
1019 *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1021
1022 *summary
1024 .by_company
1025 .entry(anomaly.company_code.clone())
1026 .or_insert(0) += 1;
1027
1028 if let Some(impact) = anomaly.monetary_impact {
1030 summary.total_monetary_impact += impact;
1031 }
1032
1033 match min_date {
1035 None => min_date = Some(anomaly.anomaly_date),
1036 Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1037 _ => {}
1038 }
1039 match max_date {
1040 None => max_date = Some(anomaly.anomaly_date),
1041 Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1042 _ => {}
1043 }
1044
1045 if let Some(cluster_id) = &anomaly.cluster_id {
1047 clusters.insert(cluster_id.clone());
1048 }
1049 }
1050
1051 summary.date_range = min_date.zip(max_date);
1052 summary.cluster_count = clusters.len();
1053
1054 summary
1055 }
1056}
1057
1058#[derive(Debug, Clone, Serialize, Deserialize)]
1060pub struct AnomalyRateConfig {
1061 pub total_rate: f64,
1063 pub fraud_rate: f64,
1065 pub error_rate: f64,
1067 pub process_issue_rate: f64,
1069 pub statistical_rate: f64,
1071 pub relational_rate: f64,
1073}
1074
1075impl Default for AnomalyRateConfig {
1076 fn default() -> Self {
1077 Self {
1078 total_rate: 0.02, fraud_rate: 0.25, error_rate: 0.35, process_issue_rate: 0.20, statistical_rate: 0.15, relational_rate: 0.05, }
1085 }
1086}
1087
1088impl AnomalyRateConfig {
1089 pub fn validate(&self) -> Result<(), String> {
1091 let sum = self.fraud_rate
1092 + self.error_rate
1093 + self.process_issue_rate
1094 + self.statistical_rate
1095 + self.relational_rate;
1096
1097 if (sum - 1.0).abs() > 0.01 {
1098 return Err(format!(
1099 "Anomaly category rates must sum to 1.0, got {}",
1100 sum
1101 ));
1102 }
1103
1104 if self.total_rate < 0.0 || self.total_rate > 1.0 {
1105 return Err(format!(
1106 "Total rate must be between 0.0 and 1.0, got {}",
1107 self.total_rate
1108 ));
1109 }
1110
1111 Ok(())
1112 }
1113}
1114
1115#[cfg(test)]
1116mod tests {
1117 use super::*;
1118 use rust_decimal_macros::dec;
1119
1120 #[test]
1121 fn test_anomaly_type_category() {
1122 let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
1123 assert_eq!(fraud.category(), "Fraud");
1124 assert!(fraud.is_intentional());
1125
1126 let error = AnomalyType::Error(ErrorType::DuplicateEntry);
1127 assert_eq!(error.category(), "Error");
1128 assert!(!error.is_intentional());
1129 }
1130
1131 #[test]
1132 fn test_labeled_anomaly() {
1133 let anomaly = LabeledAnomaly::new(
1134 "ANO001".to_string(),
1135 AnomalyType::Fraud(FraudType::SelfApproval),
1136 "JE001".to_string(),
1137 "JE".to_string(),
1138 "1000".to_string(),
1139 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1140 )
1141 .with_description("User approved their own expense report")
1142 .with_related_entity("USER001");
1143
1144 assert_eq!(anomaly.severity, 3);
1145 assert!(anomaly.is_injected);
1146 assert_eq!(anomaly.related_entities.len(), 1);
1147 }
1148
1149 #[test]
1150 fn test_labeled_anomaly_with_provenance() {
1151 let anomaly = LabeledAnomaly::new(
1152 "ANO001".to_string(),
1153 AnomalyType::Fraud(FraudType::SelfApproval),
1154 "JE001".to_string(),
1155 "JE".to_string(),
1156 "1000".to_string(),
1157 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1158 )
1159 .with_run_id("run-123")
1160 .with_generation_seed(42)
1161 .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
1162 .with_structured_strategy(InjectionStrategy::SelfApproval {
1163 user_id: "USER001".to_string(),
1164 })
1165 .with_scenario("scenario-001")
1166 .with_original_document_hash("abc123");
1167
1168 assert_eq!(anomaly.run_id, Some("run-123".to_string()));
1169 assert_eq!(anomaly.generation_seed, Some(42));
1170 assert!(anomaly.causal_reason.is_some());
1171 assert!(anomaly.structured_strategy.is_some());
1172 assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
1173 assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
1174
1175 assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
1177 }
1178
1179 #[test]
1180 fn test_labeled_anomaly_derivation_chain() {
1181 let parent = LabeledAnomaly::new(
1182 "ANO001".to_string(),
1183 AnomalyType::Fraud(FraudType::DuplicatePayment),
1184 "JE001".to_string(),
1185 "JE".to_string(),
1186 "1000".to_string(),
1187 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1188 );
1189
1190 let child = LabeledAnomaly::new(
1191 "ANO002".to_string(),
1192 AnomalyType::Error(ErrorType::DuplicateEntry),
1193 "JE002".to_string(),
1194 "JE".to_string(),
1195 "1000".to_string(),
1196 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1197 )
1198 .with_parent_anomaly(&parent.anomaly_id);
1199
1200 assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
1201 }
1202
1203 #[test]
1204 fn test_injection_strategy_description() {
1205 let strategy = InjectionStrategy::AmountManipulation {
1206 original: dec!(1000),
1207 factor: 2.5,
1208 };
1209 assert_eq!(strategy.description(), "Amount multiplied by 2.50");
1210 assert_eq!(strategy.strategy_type(), "AmountManipulation");
1211
1212 let strategy = InjectionStrategy::ThresholdAvoidance {
1213 threshold: dec!(10000),
1214 adjusted_amount: dec!(9999),
1215 };
1216 assert_eq!(
1217 strategy.description(),
1218 "Amount adjusted to avoid 10000 threshold"
1219 );
1220
1221 let strategy = InjectionStrategy::DateShift {
1222 days_shifted: -5,
1223 original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1224 };
1225 assert_eq!(strategy.description(), "Date backdated by 5 days");
1226
1227 let strategy = InjectionStrategy::DateShift {
1228 days_shifted: 3,
1229 original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1230 };
1231 assert_eq!(strategy.description(), "Date forward-dated by 3 days");
1232 }
1233
1234 #[test]
1235 fn test_causal_reason_variants() {
1236 let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
1237 if let AnomalyCausalReason::RandomRate { base_rate } = reason {
1238 assert!((base_rate - 0.02).abs() < 0.001);
1239 }
1240
1241 let reason = AnomalyCausalReason::TemporalPattern {
1242 pattern_name: "year_end_spike".to_string(),
1243 };
1244 if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
1245 assert_eq!(pattern_name, "year_end_spike");
1246 }
1247
1248 let reason = AnomalyCausalReason::ScenarioStep {
1249 scenario_type: "kickback".to_string(),
1250 step_number: 3,
1251 };
1252 if let AnomalyCausalReason::ScenarioStep {
1253 scenario_type,
1254 step_number,
1255 } = reason
1256 {
1257 assert_eq!(scenario_type, "kickback");
1258 assert_eq!(step_number, 3);
1259 }
1260 }
1261
1262 #[test]
1263 fn test_feature_vector_length() {
1264 let anomaly = LabeledAnomaly::new(
1265 "ANO001".to_string(),
1266 AnomalyType::Fraud(FraudType::SelfApproval),
1267 "JE001".to_string(),
1268 "JE".to_string(),
1269 "1000".to_string(),
1270 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1271 );
1272
1273 let features = anomaly.to_features();
1274 assert_eq!(features.len(), LabeledAnomaly::feature_count());
1275 assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
1276 }
1277
1278 #[test]
1279 fn test_feature_vector_with_provenance() {
1280 let anomaly = LabeledAnomaly::new(
1281 "ANO001".to_string(),
1282 AnomalyType::Fraud(FraudType::SelfApproval),
1283 "JE001".to_string(),
1284 "JE".to_string(),
1285 "1000".to_string(),
1286 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1287 )
1288 .with_scenario("scenario-001")
1289 .with_parent_anomaly("ANO000");
1290
1291 let features = anomaly.to_features();
1292
1293 assert_eq!(features[features.len() - 2], 1.0); assert_eq!(features[features.len() - 1], 1.0); }
1297
1298 #[test]
1299 fn test_anomaly_summary() {
1300 let anomalies = vec![
1301 LabeledAnomaly::new(
1302 "ANO001".to_string(),
1303 AnomalyType::Fraud(FraudType::SelfApproval),
1304 "JE001".to_string(),
1305 "JE".to_string(),
1306 "1000".to_string(),
1307 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1308 ),
1309 LabeledAnomaly::new(
1310 "ANO002".to_string(),
1311 AnomalyType::Error(ErrorType::DuplicateEntry),
1312 "JE002".to_string(),
1313 "JE".to_string(),
1314 "1000".to_string(),
1315 NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
1316 ),
1317 ];
1318
1319 let summary = AnomalySummary::from_anomalies(&anomalies);
1320
1321 assert_eq!(summary.total_count, 2);
1322 assert_eq!(summary.by_category.get("Fraud"), Some(&1));
1323 assert_eq!(summary.by_category.get("Error"), Some(&1));
1324 }
1325
1326 #[test]
1327 fn test_rate_config_validation() {
1328 let config = AnomalyRateConfig::default();
1329 assert!(config.validate().is_ok());
1330
1331 let bad_config = AnomalyRateConfig {
1332 fraud_rate: 0.5,
1333 error_rate: 0.5,
1334 process_issue_rate: 0.5, ..Default::default()
1336 };
1337 assert!(bad_config.validate().is_err());
1338 }
1339
1340 #[test]
1341 fn test_injection_strategy_serialization() {
1342 let strategy = InjectionStrategy::SoDViolation {
1343 duty1: "CreatePO".to_string(),
1344 duty2: "ApprovePO".to_string(),
1345 violating_user: "USER001".to_string(),
1346 };
1347
1348 let json = serde_json::to_string(&strategy).unwrap();
1349 let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
1350
1351 assert_eq!(strategy, deserialized);
1352 }
1353
1354 #[test]
1355 fn test_labeled_anomaly_serialization_with_provenance() {
1356 let anomaly = LabeledAnomaly::new(
1357 "ANO001".to_string(),
1358 AnomalyType::Fraud(FraudType::SelfApproval),
1359 "JE001".to_string(),
1360 "JE".to_string(),
1361 "1000".to_string(),
1362 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
1363 )
1364 .with_run_id("run-123")
1365 .with_generation_seed(42)
1366 .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
1367
1368 let json = serde_json::to_string(&anomaly).unwrap();
1369 let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
1370
1371 assert_eq!(anomaly.run_id, deserialized.run_id);
1372 assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
1373 }
1374}