1use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
17pub enum AnomalyType {
18 Fraud(FraudType),
20 Error(ErrorType),
22 ProcessIssue(ProcessIssueType),
24 Statistical(StatisticalAnomalyType),
26 Relational(RelationalAnomalyType),
28 Custom(String),
30}
31
32impl AnomalyType {
33 pub fn category(&self) -> &'static str {
35 match self {
36 AnomalyType::Fraud(_) => "Fraud",
37 AnomalyType::Error(_) => "Error",
38 AnomalyType::ProcessIssue(_) => "ProcessIssue",
39 AnomalyType::Statistical(_) => "Statistical",
40 AnomalyType::Relational(_) => "Relational",
41 AnomalyType::Custom(_) => "Custom",
42 }
43 }
44
45 pub fn type_name(&self) -> String {
47 match self {
48 AnomalyType::Fraud(t) => format!("{:?}", t),
49 AnomalyType::Error(t) => format!("{:?}", t),
50 AnomalyType::ProcessIssue(t) => format!("{:?}", t),
51 AnomalyType::Statistical(t) => format!("{:?}", t),
52 AnomalyType::Relational(t) => format!("{:?}", t),
53 AnomalyType::Custom(s) => s.clone(),
54 }
55 }
56
57 pub fn severity(&self) -> u8 {
59 match self {
60 AnomalyType::Fraud(t) => t.severity(),
61 AnomalyType::Error(t) => t.severity(),
62 AnomalyType::ProcessIssue(t) => t.severity(),
63 AnomalyType::Statistical(t) => t.severity(),
64 AnomalyType::Relational(t) => t.severity(),
65 AnomalyType::Custom(_) => 3,
66 }
67 }
68
69 pub fn is_intentional(&self) -> bool {
71 matches!(self, AnomalyType::Fraud(_))
72 }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub enum FraudType {
78 FictitiousEntry,
81 FictitiousTransaction,
83 RoundDollarManipulation,
85 JustBelowThreshold,
87 RevenueManipulation,
89 ImproperCapitalization,
91 ExpenseCapitalization,
93 ReserveManipulation,
95 SuspenseAccountAbuse,
97 SplitTransaction,
99 TimingAnomaly,
101 UnauthorizedAccess,
103
104 SelfApproval,
107 ExceededApprovalLimit,
109 SegregationOfDutiesViolation,
111 UnauthorizedApproval,
113 CollusiveApproval,
115
116 FictitiousVendor,
119 DuplicatePayment,
121 ShellCompanyPayment,
123 Kickback,
125 KickbackScheme,
127 InvoiceManipulation,
129
130 AssetMisappropriation,
133 InventoryTheft,
135 GhostEmployee,
137
138 PrematureRevenue,
141 UnderstatedLiabilities,
143 OverstatedAssets,
145 ChannelStuffing,
147}
148
149impl FraudType {
150 pub fn severity(&self) -> u8 {
152 match self {
153 FraudType::RoundDollarManipulation => 2,
154 FraudType::JustBelowThreshold => 3,
155 FraudType::SelfApproval => 3,
156 FraudType::ExceededApprovalLimit => 3,
157 FraudType::DuplicatePayment => 3,
158 FraudType::FictitiousEntry => 4,
159 FraudType::RevenueManipulation => 5,
160 FraudType::FictitiousVendor => 5,
161 FraudType::ShellCompanyPayment => 5,
162 FraudType::AssetMisappropriation => 5,
163 FraudType::SegregationOfDutiesViolation => 4,
164 FraudType::CollusiveApproval => 5,
165 _ => 4,
166 }
167 }
168}
169
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
172pub enum ErrorType {
173 DuplicateEntry,
176 ReversedAmount,
178 TransposedDigits,
180 DecimalError,
182 MissingField,
184 InvalidAccount,
186
187 WrongPeriod,
190 BackdatedEntry,
192 FutureDatedEntry,
194 CutoffError,
196
197 MisclassifiedAccount,
200 WrongCostCenter,
202 WrongCompanyCode,
204
205 UnbalancedEntry,
208 RoundingError,
210 CurrencyError,
212 TaxCalculationError,
214}
215
216impl ErrorType {
217 pub fn severity(&self) -> u8 {
219 match self {
220 ErrorType::RoundingError => 1,
221 ErrorType::MissingField => 2,
222 ErrorType::TransposedDigits => 2,
223 ErrorType::DecimalError => 3,
224 ErrorType::DuplicateEntry => 3,
225 ErrorType::ReversedAmount => 3,
226 ErrorType::WrongPeriod => 4,
227 ErrorType::UnbalancedEntry => 5,
228 ErrorType::CurrencyError => 4,
229 _ => 3,
230 }
231 }
232}
233
234#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
236pub enum ProcessIssueType {
237 SkippedApproval,
240 LateApproval,
242 MissingDocumentation,
244 IncompleteApprovalChain,
246
247 LatePosting,
250 AfterHoursPosting,
252 WeekendPosting,
254 RushedPeriodEnd,
256
257 ManualOverride,
260 UnusualAccess,
262 SystemBypass,
264 BatchAnomaly,
266
267 VagueDescription,
270 PostFactoChange,
272 IncompleteAuditTrail,
274}
275
276impl ProcessIssueType {
277 pub fn severity(&self) -> u8 {
279 match self {
280 ProcessIssueType::VagueDescription => 1,
281 ProcessIssueType::LatePosting => 2,
282 ProcessIssueType::AfterHoursPosting => 2,
283 ProcessIssueType::WeekendPosting => 2,
284 ProcessIssueType::SkippedApproval => 4,
285 ProcessIssueType::ManualOverride => 4,
286 ProcessIssueType::SystemBypass => 5,
287 ProcessIssueType::IncompleteAuditTrail => 4,
288 _ => 3,
289 }
290 }
291}
292
293#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
295pub enum StatisticalAnomalyType {
296 UnusuallyHighAmount,
299 UnusuallyLowAmount,
301 BenfordViolation,
303 ExactDuplicateAmount,
305 RepeatingAmount,
307
308 UnusualFrequency,
311 TransactionBurst,
313 UnusualTiming,
315
316 TrendBreak,
319 LevelShift,
321 SeasonalAnomaly,
323
324 StatisticalOutlier,
327 VarianceChange,
329 DistributionShift,
331}
332
333impl StatisticalAnomalyType {
334 pub fn severity(&self) -> u8 {
336 match self {
337 StatisticalAnomalyType::UnusualTiming => 1,
338 StatisticalAnomalyType::UnusualFrequency => 2,
339 StatisticalAnomalyType::BenfordViolation => 2,
340 StatisticalAnomalyType::UnusuallyHighAmount => 3,
341 StatisticalAnomalyType::TrendBreak => 3,
342 StatisticalAnomalyType::TransactionBurst => 4,
343 StatisticalAnomalyType::ExactDuplicateAmount => 3,
344 _ => 3,
345 }
346 }
347}
348
349#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
351pub enum RelationalAnomalyType {
352 CircularTransaction,
355 UnusualAccountPair,
357 NewCounterparty,
359 DormantAccountActivity,
361
362 CentralityAnomaly,
365 IsolatedCluster,
367 BridgeNodeAnomaly,
369 CommunityAnomaly,
371
372 MissingRelationship,
375 UnexpectedRelationship,
377 RelationshipStrengthChange,
379
380 UnmatchedIntercompany,
383 CircularIntercompany,
385 TransferPricingAnomaly,
387}
388
389impl RelationalAnomalyType {
390 pub fn severity(&self) -> u8 {
392 match self {
393 RelationalAnomalyType::NewCounterparty => 1,
394 RelationalAnomalyType::DormantAccountActivity => 2,
395 RelationalAnomalyType::UnusualAccountPair => 2,
396 RelationalAnomalyType::CircularTransaction => 4,
397 RelationalAnomalyType::CircularIntercompany => 4,
398 RelationalAnomalyType::TransferPricingAnomaly => 4,
399 RelationalAnomalyType::UnmatchedIntercompany => 3,
400 _ => 3,
401 }
402 }
403}
404
405#[derive(Debug, Clone, Serialize, Deserialize)]
407pub struct LabeledAnomaly {
408 pub anomaly_id: String,
410 pub anomaly_type: AnomalyType,
412 pub document_id: String,
414 pub document_type: String,
416 pub company_code: String,
418 pub anomaly_date: NaiveDate,
420 pub detection_timestamp: NaiveDateTime,
422 pub confidence: f64,
424 pub severity: u8,
426 pub description: String,
428 pub related_entities: Vec<String>,
430 pub monetary_impact: Option<Decimal>,
432 pub metadata: HashMap<String, String>,
434 pub is_injected: bool,
436 pub injection_strategy: Option<String>,
438 pub cluster_id: Option<String>,
440}
441
442impl LabeledAnomaly {
443 pub fn new(
445 anomaly_id: String,
446 anomaly_type: AnomalyType,
447 document_id: String,
448 document_type: String,
449 company_code: String,
450 anomaly_date: NaiveDate,
451 ) -> Self {
452 let severity = anomaly_type.severity();
453 let description = format!(
454 "{} - {} in document {}",
455 anomaly_type.category(),
456 anomaly_type.type_name(),
457 document_id
458 );
459
460 Self {
461 anomaly_id,
462 anomaly_type,
463 document_id,
464 document_type,
465 company_code,
466 anomaly_date,
467 detection_timestamp: chrono::Local::now().naive_local(),
468 confidence: 1.0,
469 severity,
470 description,
471 related_entities: Vec::new(),
472 monetary_impact: None,
473 metadata: HashMap::new(),
474 is_injected: true,
475 injection_strategy: None,
476 cluster_id: None,
477 }
478 }
479
480 pub fn with_description(mut self, description: &str) -> Self {
482 self.description = description.to_string();
483 self
484 }
485
486 pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
488 self.monetary_impact = Some(impact);
489 self
490 }
491
492 pub fn with_related_entity(mut self, entity: &str) -> Self {
494 self.related_entities.push(entity.to_string());
495 self
496 }
497
498 pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
500 self.metadata.insert(key.to_string(), value.to_string());
501 self
502 }
503
504 pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
506 self.injection_strategy = Some(strategy.to_string());
507 self
508 }
509
510 pub fn with_cluster(mut self, cluster_id: &str) -> Self {
512 self.cluster_id = Some(cluster_id.to_string());
513 self
514 }
515
516 pub fn to_features(&self) -> Vec<f64> {
518 let mut features = Vec::new();
519
520 let categories = [
522 "Fraud",
523 "Error",
524 "ProcessIssue",
525 "Statistical",
526 "Relational",
527 "Custom",
528 ];
529 for cat in &categories {
530 features.push(if self.anomaly_type.category() == *cat {
531 1.0
532 } else {
533 0.0
534 });
535 }
536
537 features.push(self.severity as f64 / 5.0);
539
540 features.push(self.confidence);
542
543 features.push(if self.monetary_impact.is_some() {
545 1.0
546 } else {
547 0.0
548 });
549
550 if let Some(impact) = self.monetary_impact {
552 let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
553 features.push((impact_f64.abs() + 1.0).ln());
554 } else {
555 features.push(0.0);
556 }
557
558 features.push(if self.anomaly_type.is_intentional() {
560 1.0
561 } else {
562 0.0
563 });
564
565 features.push(self.related_entities.len() as f64);
567
568 features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
570
571 features
572 }
573}
574
575#[derive(Debug, Clone, Default, Serialize, Deserialize)]
577pub struct AnomalySummary {
578 pub total_count: usize,
580 pub by_category: HashMap<String, usize>,
582 pub by_type: HashMap<String, usize>,
584 pub by_severity: HashMap<u8, usize>,
586 pub by_company: HashMap<String, usize>,
588 pub total_monetary_impact: Decimal,
590 pub date_range: Option<(NaiveDate, NaiveDate)>,
592 pub cluster_count: usize,
594}
595
596impl AnomalySummary {
597 pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
599 let mut summary = AnomalySummary {
600 total_count: anomalies.len(),
601 ..Default::default()
602 };
603
604 let mut min_date: Option<NaiveDate> = None;
605 let mut max_date: Option<NaiveDate> = None;
606 let mut clusters = std::collections::HashSet::new();
607
608 for anomaly in anomalies {
609 *summary
611 .by_category
612 .entry(anomaly.anomaly_type.category().to_string())
613 .or_insert(0) += 1;
614
615 *summary
617 .by_type
618 .entry(anomaly.anomaly_type.type_name())
619 .or_insert(0) += 1;
620
621 *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
623
624 *summary
626 .by_company
627 .entry(anomaly.company_code.clone())
628 .or_insert(0) += 1;
629
630 if let Some(impact) = anomaly.monetary_impact {
632 summary.total_monetary_impact += impact;
633 }
634
635 match min_date {
637 None => min_date = Some(anomaly.anomaly_date),
638 Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
639 _ => {}
640 }
641 match max_date {
642 None => max_date = Some(anomaly.anomaly_date),
643 Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
644 _ => {}
645 }
646
647 if let Some(cluster_id) = &anomaly.cluster_id {
649 clusters.insert(cluster_id.clone());
650 }
651 }
652
653 summary.date_range = min_date.zip(max_date);
654 summary.cluster_count = clusters.len();
655
656 summary
657 }
658}
659
660#[derive(Debug, Clone, Serialize, Deserialize)]
662pub struct AnomalyRateConfig {
663 pub total_rate: f64,
665 pub fraud_rate: f64,
667 pub error_rate: f64,
669 pub process_issue_rate: f64,
671 pub statistical_rate: f64,
673 pub relational_rate: f64,
675}
676
677impl Default for AnomalyRateConfig {
678 fn default() -> Self {
679 Self {
680 total_rate: 0.02, fraud_rate: 0.25, error_rate: 0.35, process_issue_rate: 0.20, statistical_rate: 0.15, relational_rate: 0.05, }
687 }
688}
689
690impl AnomalyRateConfig {
691 pub fn validate(&self) -> Result<(), String> {
693 let sum = self.fraud_rate
694 + self.error_rate
695 + self.process_issue_rate
696 + self.statistical_rate
697 + self.relational_rate;
698
699 if (sum - 1.0).abs() > 0.01 {
700 return Err(format!(
701 "Anomaly category rates must sum to 1.0, got {}",
702 sum
703 ));
704 }
705
706 if self.total_rate < 0.0 || self.total_rate > 1.0 {
707 return Err(format!(
708 "Total rate must be between 0.0 and 1.0, got {}",
709 self.total_rate
710 ));
711 }
712
713 Ok(())
714 }
715}
716
717#[cfg(test)]
718mod tests {
719 use super::*;
720
721 #[test]
722 fn test_anomaly_type_category() {
723 let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
724 assert_eq!(fraud.category(), "Fraud");
725 assert!(fraud.is_intentional());
726
727 let error = AnomalyType::Error(ErrorType::DuplicateEntry);
728 assert_eq!(error.category(), "Error");
729 assert!(!error.is_intentional());
730 }
731
732 #[test]
733 fn test_labeled_anomaly() {
734 let anomaly = LabeledAnomaly::new(
735 "ANO001".to_string(),
736 AnomalyType::Fraud(FraudType::SelfApproval),
737 "JE001".to_string(),
738 "JE".to_string(),
739 "1000".to_string(),
740 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
741 )
742 .with_description("User approved their own expense report")
743 .with_related_entity("USER001");
744
745 assert_eq!(anomaly.severity, 3);
746 assert!(anomaly.is_injected);
747 assert_eq!(anomaly.related_entities.len(), 1);
748 }
749
750 #[test]
751 fn test_anomaly_summary() {
752 let anomalies = vec![
753 LabeledAnomaly::new(
754 "ANO001".to_string(),
755 AnomalyType::Fraud(FraudType::SelfApproval),
756 "JE001".to_string(),
757 "JE".to_string(),
758 "1000".to_string(),
759 NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
760 ),
761 LabeledAnomaly::new(
762 "ANO002".to_string(),
763 AnomalyType::Error(ErrorType::DuplicateEntry),
764 "JE002".to_string(),
765 "JE".to_string(),
766 "1000".to_string(),
767 NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
768 ),
769 ];
770
771 let summary = AnomalySummary::from_anomalies(&anomalies);
772
773 assert_eq!(summary.total_count, 2);
774 assert_eq!(summary.by_category.get("Fraud"), Some(&1));
775 assert_eq!(summary.by_category.get("Error"), Some(&1));
776 }
777
778 #[test]
779 fn test_rate_config_validation() {
780 let config = AnomalyRateConfig::default();
781 assert!(config.validate().is_ok());
782
783 let bad_config = AnomalyRateConfig {
784 fraud_rate: 0.5,
785 error_rate: 0.5,
786 process_issue_rate: 0.5, ..Default::default()
788 };
789 assert!(bad_config.validate().is_err());
790 }
791}