1use datasynth_core::models::{
8 AnomalyDetectionDifficulty, AnomalyType, ConcealmentTechnique, DetectionMethod, FraudType,
9 LabeledAnomaly,
10};
11use serde::{Deserialize, Serialize};
12
13#[derive(Debug, Clone, Default, Serialize, Deserialize)]
15pub struct ConcealmentFactors {
16 pub document_manipulation: bool,
18 pub approval_circumvention: bool,
20 pub timing_exploitation: bool,
22 pub splitting: bool,
24 pub techniques: Vec<ConcealmentTechnique>,
26}
27
28impl ConcealmentFactors {
29 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_technique(mut self, technique: ConcealmentTechnique) -> Self {
36 if !self.techniques.contains(&technique) {
37 self.techniques.push(technique);
38 }
39 self
40 }
41
42 pub fn with_document_manipulation(mut self) -> Self {
44 self.document_manipulation = true;
45 self
46 }
47
48 pub fn with_approval_circumvention(mut self) -> Self {
50 self.approval_circumvention = true;
51 self
52 }
53
54 pub fn with_timing_exploitation(mut self) -> Self {
56 self.timing_exploitation = true;
57 self
58 }
59
60 pub fn with_splitting(mut self) -> Self {
62 self.splitting = true;
63 self
64 }
65
66 pub fn difficulty_contribution(&self) -> f64 {
68 let mut contribution = 0.0;
69
70 if self.document_manipulation {
71 contribution += 0.20;
72 }
73 if self.approval_circumvention {
74 contribution += 0.15;
75 }
76 if self.timing_exploitation {
77 contribution += 0.10;
78 }
79 if self.splitting {
80 contribution += 0.15;
81 }
82
83 for technique in &self.techniques {
85 contribution += technique.difficulty_bonus();
86 }
87
88 contribution.min(0.50)
90 }
91}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
95pub struct BlendingFactors {
96 pub amount_within_normal_range: bool,
98 pub timing_within_normal_hours: bool,
100 pub counterparty_is_established: bool,
102 pub account_coding_correct: bool,
104 pub description_matches_pattern: bool,
106 pub frequency_is_normal: bool,
108}
109
110impl BlendingFactors {
111 pub fn new() -> Self {
113 Self::default()
114 }
115
116 pub fn with_normal_amount(mut self) -> Self {
118 self.amount_within_normal_range = true;
119 self
120 }
121
122 pub fn with_normal_timing(mut self) -> Self {
124 self.timing_within_normal_hours = true;
125 self
126 }
127
128 pub fn with_established_counterparty(mut self) -> Self {
130 self.counterparty_is_established = true;
131 self
132 }
133
134 pub fn with_correct_coding(mut self) -> Self {
136 self.account_coding_correct = true;
137 self
138 }
139
140 pub fn with_normal_description(mut self) -> Self {
142 self.description_matches_pattern = true;
143 self
144 }
145
146 pub fn with_normal_frequency(mut self) -> Self {
148 self.frequency_is_normal = true;
149 self
150 }
151
152 pub fn difficulty_contribution(&self) -> f64 {
154 let mut contribution: f64 = 0.0;
155
156 if self.amount_within_normal_range {
157 contribution += 0.15;
158 }
159 if self.timing_within_normal_hours {
160 contribution += 0.10;
161 }
162 if self.counterparty_is_established {
163 contribution += 0.10;
164 }
165 if self.account_coding_correct {
166 contribution += 0.10;
167 }
168 if self.description_matches_pattern {
169 contribution += 0.08;
170 }
171 if self.frequency_is_normal {
172 contribution += 0.07;
173 }
174
175 contribution.min(0.40)
176 }
177}
178
179#[derive(Debug, Clone, Default, Serialize, Deserialize)]
181pub struct CollusionFactors {
182 pub participants: u32,
184 pub management_involved: bool,
186 pub it_involved: bool,
188 pub external_party_involved: bool,
190}
191
192impl CollusionFactors {
193 pub fn new() -> Self {
195 Self::default()
196 }
197
198 pub fn with_participants(mut self, count: u32) -> Self {
200 self.participants = count;
201 self
202 }
203
204 pub fn with_management(mut self) -> Self {
206 self.management_involved = true;
207 self
208 }
209
210 pub fn with_it(mut self) -> Self {
212 self.it_involved = true;
213 self
214 }
215
216 pub fn with_external_party(mut self) -> Self {
218 self.external_party_involved = true;
219 self
220 }
221
222 pub fn difficulty_contribution(&self) -> f64 {
224 let mut contribution = 0.0;
225
226 if self.participants > 1 {
228 contribution += (self.participants as f64 - 1.0).min(3.0) * 0.08;
229 }
230
231 if self.management_involved {
232 contribution += 0.15;
233 }
234 if self.it_involved {
235 contribution += 0.12;
236 }
237 if self.external_party_involved {
238 contribution += 0.10;
239 }
240
241 contribution.min(0.35)
242 }
243}
244
245#[derive(Debug, Clone, Default, Serialize, Deserialize)]
247pub struct TemporalFactors {
248 pub high_volume_period: bool,
250 pub staff_transition_period: bool,
252 pub cross_period: bool,
254 pub days_since_anomaly: u32,
256}
257
258impl TemporalFactors {
259 pub fn new() -> Self {
261 Self::default()
262 }
263
264 pub fn with_high_volume(mut self) -> Self {
266 self.high_volume_period = true;
267 self
268 }
269
270 pub fn with_staff_transition(mut self) -> Self {
272 self.staff_transition_period = true;
273 self
274 }
275
276 pub fn with_cross_period(mut self) -> Self {
278 self.cross_period = true;
279 self
280 }
281
282 pub fn with_age(mut self, days: u32) -> Self {
284 self.days_since_anomaly = days;
285 self
286 }
287
288 pub fn difficulty_contribution(&self) -> f64 {
290 let mut contribution = 0.0;
291
292 if self.high_volume_period {
293 contribution += 0.08;
294 }
295 if self.staff_transition_period {
296 contribution += 0.10;
297 }
298 if self.cross_period {
299 contribution += 0.12;
300 }
301
302 if self.days_since_anomaly > 30 {
304 contribution += ((self.days_since_anomaly as f64 - 30.0) / 365.0).min(0.15);
305 }
306
307 contribution.min(0.30)
308 }
309}
310
311#[derive(Debug, Clone, Default, Serialize, Deserialize)]
313pub struct AmountFactors {
314 pub near_common_amount: bool,
316 pub just_below_threshold: bool,
318 pub small_relative_percentage: bool,
320 pub std_deviations_from_mean: f64,
322}
323
324impl AmountFactors {
325 pub fn new() -> Self {
327 Self::default()
328 }
329
330 pub fn with_common_amount(mut self) -> Self {
332 self.near_common_amount = true;
333 self
334 }
335
336 pub fn just_below_threshold(mut self) -> Self {
338 self.just_below_threshold = true;
339 self
340 }
341
342 pub fn with_small_percentage(mut self) -> Self {
344 self.small_relative_percentage = true;
345 self
346 }
347
348 pub fn with_std_devs(mut self, std_devs: f64) -> Self {
350 self.std_deviations_from_mean = std_devs;
351 self
352 }
353
354 pub fn difficulty_contribution(&self) -> f64 {
356 let mut contribution = 0.0;
357
358 if self.near_common_amount {
359 contribution += 0.12;
360 }
361 if self.just_below_threshold {
362 contribution += 0.05; }
364 if self.small_relative_percentage {
365 contribution += 0.15;
366 }
367
368 if self.std_deviations_from_mean < 2.0 {
370 contribution += 0.10 * (2.0 - self.std_deviations_from_mean).max(0.0);
371 }
372
373 contribution.min(0.35)
374 }
375}
376
377#[derive(Debug, Clone, Default, Serialize, Deserialize)]
379pub struct DifficultyFactors {
380 pub concealment: ConcealmentFactors,
382 pub blending: BlendingFactors,
384 pub collusion: CollusionFactors,
386 pub temporal: TemporalFactors,
388 pub amount: AmountFactors,
390}
391
392impl DifficultyFactors {
393 pub fn new() -> Self {
395 Self::default()
396 }
397
398 pub fn with_concealment(mut self, concealment: ConcealmentFactors) -> Self {
400 self.concealment = concealment;
401 self
402 }
403
404 pub fn with_blending(mut self, blending: BlendingFactors) -> Self {
406 self.blending = blending;
407 self
408 }
409
410 pub fn with_collusion(mut self, collusion: CollusionFactors) -> Self {
412 self.collusion = collusion;
413 self
414 }
415
416 pub fn with_temporal(mut self, temporal: TemporalFactors) -> Self {
418 self.temporal = temporal;
419 self
420 }
421
422 pub fn with_amount(mut self, amount: AmountFactors) -> Self {
424 self.amount = amount;
425 self
426 }
427}
428
429#[derive(Debug, Clone)]
431pub struct DifficultyCalculator {
432 type_base_difficulty: std::collections::HashMap<String, f64>,
434}
435
436impl Default for DifficultyCalculator {
437 fn default() -> Self {
438 Self::new()
439 }
440}
441
442impl DifficultyCalculator {
443 pub fn new() -> Self {
445 let mut type_base_difficulty = std::collections::HashMap::new();
446
447 type_base_difficulty.insert("FictitiousEntry".to_string(), 0.30);
449 type_base_difficulty.insert("FictitiousTransaction".to_string(), 0.30);
450 type_base_difficulty.insert("FictitiousVendor".to_string(), 0.40);
451 type_base_difficulty.insert("SelfApproval".to_string(), 0.15);
452 type_base_difficulty.insert("SegregationOfDutiesViolation".to_string(), 0.20);
453 type_base_difficulty.insert("DuplicatePayment".to_string(), 0.10);
454 type_base_difficulty.insert("Kickback".to_string(), 0.50);
455 type_base_difficulty.insert("KickbackScheme".to_string(), 0.50);
456 type_base_difficulty.insert("RevenueManipulation".to_string(), 0.45);
457 type_base_difficulty.insert("CollusiveApproval".to_string(), 0.55);
458
459 type_base_difficulty.insert("DuplicateEntry".to_string(), 0.05);
461 type_base_difficulty.insert("ReversedAmount".to_string(), 0.10);
462 type_base_difficulty.insert("WrongPeriod".to_string(), 0.20);
463 type_base_difficulty.insert("MissingField".to_string(), 0.05);
464 type_base_difficulty.insert("UnbalancedEntry".to_string(), 0.03);
465
466 type_base_difficulty.insert("SkippedApproval".to_string(), 0.15);
468 type_base_difficulty.insert("LatePosting".to_string(), 0.12);
469 type_base_difficulty.insert("ManualOverride".to_string(), 0.25);
470
471 type_base_difficulty.insert("UnusuallyHighAmount".to_string(), 0.15);
473 type_base_difficulty.insert("BenfordViolation".to_string(), 0.25);
474 type_base_difficulty.insert("TrendBreak".to_string(), 0.30);
475
476 type_base_difficulty.insert("CircularTransaction".to_string(), 0.40);
478 type_base_difficulty.insert("CircularIntercompany".to_string(), 0.45);
479
480 Self {
481 type_base_difficulty,
482 }
483 }
484
485 pub fn calculate(&self, anomaly: &LabeledAnomaly) -> AnomalyDetectionDifficulty {
487 let score = self.compute_difficulty_score(anomaly, &DifficultyFactors::default());
488 AnomalyDetectionDifficulty::from_score(score)
489 }
490
491 pub fn calculate_with_factors(
493 &self,
494 anomaly: &LabeledAnomaly,
495 factors: &DifficultyFactors,
496 ) -> AnomalyDetectionDifficulty {
497 let score = self.compute_difficulty_score(anomaly, factors);
498 AnomalyDetectionDifficulty::from_score(score)
499 }
500
501 pub fn compute_difficulty_score(
503 &self,
504 anomaly: &LabeledAnomaly,
505 factors: &DifficultyFactors,
506 ) -> f64 {
507 let type_name = anomaly.anomaly_type.type_name();
509 let base_difficulty = *self.type_base_difficulty.get(&type_name).unwrap_or(&0.25);
510
511 let concealment_contribution = factors.concealment.difficulty_contribution();
513 let blending_contribution = factors.blending.difficulty_contribution();
514 let collusion_contribution = factors.collusion.difficulty_contribution();
515 let temporal_contribution = factors.temporal.difficulty_contribution();
516 let amount_contribution = factors.amount.difficulty_contribution();
517
518 let total_contribution = concealment_contribution
520 + blending_contribution
521 + collusion_contribution
522 + temporal_contribution
523 + amount_contribution;
524
525 let score = base_difficulty * 0.4 + total_contribution * 0.6;
527
528 score.clamp(0.0, 1.0)
530 }
531
532 pub fn recommended_methods(
534 &self,
535 difficulty: AnomalyDetectionDifficulty,
536 ) -> Vec<DetectionMethod> {
537 match difficulty {
538 AnomalyDetectionDifficulty::Trivial => vec![DetectionMethod::RuleBased],
539 AnomalyDetectionDifficulty::Easy => {
540 vec![DetectionMethod::RuleBased, DetectionMethod::Statistical]
541 }
542 AnomalyDetectionDifficulty::Moderate => vec![
543 DetectionMethod::Statistical,
544 DetectionMethod::MachineLearning,
545 ],
546 AnomalyDetectionDifficulty::Hard => vec![
547 DetectionMethod::MachineLearning,
548 DetectionMethod::GraphBased,
549 ],
550 AnomalyDetectionDifficulty::Expert => vec![
551 DetectionMethod::GraphBased,
552 DetectionMethod::ForensicAudit,
553 DetectionMethod::Hybrid,
554 ],
555 }
556 }
557
558 pub fn infer_factors(&self, anomaly: &LabeledAnomaly) -> DifficultyFactors {
560 let mut factors = DifficultyFactors::default();
561
562 match &anomaly.anomaly_type {
564 AnomalyType::Fraud(fraud_type) => {
565 if matches!(
567 fraud_type,
568 FraudType::CollusiveApproval | FraudType::KickbackScheme | FraudType::Kickback
569 ) {
570 factors.collusion = factors.collusion.with_participants(2);
571 }
572
573 if matches!(
575 fraud_type,
576 FraudType::FictitiousEntry
577 | FraudType::FictitiousVendor
578 | FraudType::InvoiceManipulation
579 ) {
580 factors.concealment = factors.concealment.with_document_manipulation();
581 }
582
583 if matches!(
585 fraud_type,
586 FraudType::JustBelowThreshold | FraudType::SplitTransaction
587 ) {
588 factors.concealment = factors.concealment.with_splitting();
589 factors.amount = factors.amount.just_below_threshold();
590 }
591
592 if matches!(fraud_type, FraudType::TimingAnomaly) {
594 factors.concealment = factors.concealment.with_timing_exploitation();
595 }
596 }
597 AnomalyType::Error(_) => {
598 }
600 AnomalyType::ProcessIssue(process_type) => {
601 use datasynth_core::models::ProcessIssueType;
602 if matches!(process_type, ProcessIssueType::SkippedApproval) {
603 factors.concealment = factors.concealment.with_approval_circumvention();
604 }
605 if matches!(
606 process_type,
607 ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting
608 ) {
609 factors.concealment = factors.concealment.with_timing_exploitation();
610 }
611 }
612 _ => {}
613 }
614
615 if anomaly.metadata.contains_key("collusion") {
617 factors.collusion = factors.collusion.with_participants(2);
618 }
619 if anomaly.metadata.contains_key("management_override") {
620 factors.collusion = factors.collusion.with_management();
621 }
622
623 factors
624 }
625}
626
627#[derive(Debug, Clone, Serialize, Deserialize)]
629pub struct DifficultyAssessment {
630 pub difficulty: AnomalyDetectionDifficulty,
632 pub score: f64,
634 pub factors: DifficultyFactors,
636 pub recommended_methods: Vec<DetectionMethod>,
638 pub expected_detection_rate: f64,
640 pub key_indicators: Vec<String>,
642}
643
644impl DifficultyAssessment {
645 pub fn new(
647 difficulty: AnomalyDetectionDifficulty,
648 score: f64,
649 factors: DifficultyFactors,
650 methods: Vec<DetectionMethod>,
651 ) -> Self {
652 Self {
653 expected_detection_rate: difficulty.expected_detection_rate(),
654 difficulty,
655 score,
656 factors,
657 recommended_methods: methods,
658 key_indicators: Vec::new(),
659 }
660 }
661
662 pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
664 self.key_indicators.push(indicator.into());
665 self
666 }
667}
668
669#[cfg(test)]
670mod tests {
671 use super::*;
672 use chrono::NaiveDate;
673 use datasynth_core::models::ErrorType;
674
675 fn create_test_anomaly(anomaly_type: AnomalyType) -> LabeledAnomaly {
676 LabeledAnomaly::new(
677 "ANO001".to_string(),
678 anomaly_type,
679 "JE001".to_string(),
680 "JE".to_string(),
681 "1000".to_string(),
682 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
683 )
684 }
685
686 #[test]
687 fn test_concealment_factors() {
688 let factors = ConcealmentFactors::new()
689 .with_document_manipulation()
690 .with_splitting();
691
692 let contribution = factors.difficulty_contribution();
693 assert!(contribution > 0.3);
694 assert!(contribution <= 0.5);
695 }
696
697 #[test]
698 fn test_blending_factors() {
699 let factors = BlendingFactors::new()
700 .with_normal_amount()
701 .with_normal_timing()
702 .with_established_counterparty();
703
704 let contribution = factors.difficulty_contribution();
705 assert!(contribution > 0.3);
706 assert!(contribution <= 0.4);
707 }
708
709 #[test]
710 fn test_collusion_factors() {
711 let factors = CollusionFactors::new()
712 .with_participants(3)
713 .with_management();
714
715 let contribution = factors.difficulty_contribution();
716 assert!(contribution > 0.3);
717 }
718
719 #[test]
720 fn test_difficulty_calculator_basic() {
721 let calculator = DifficultyCalculator::new();
722
723 let error_anomaly = create_test_anomaly(AnomalyType::Error(ErrorType::DuplicateEntry));
725 let difficulty = calculator.calculate(&error_anomaly);
726 assert!(matches!(
727 difficulty,
728 AnomalyDetectionDifficulty::Trivial | AnomalyDetectionDifficulty::Easy
729 ));
730
731 let fraud_anomaly = create_test_anomaly(AnomalyType::Fraud(FraudType::KickbackScheme));
734 let difficulty = calculator.calculate(&fraud_anomaly);
735 assert!(matches!(
736 difficulty,
737 AnomalyDetectionDifficulty::Easy | AnomalyDetectionDifficulty::Moderate
738 ));
739 }
740
741 #[test]
742 fn test_difficulty_with_factors() {
743 let calculator = DifficultyCalculator::new();
744 let anomaly = create_test_anomaly(AnomalyType::Fraud(FraudType::FictitiousVendor));
745
746 let base_difficulty = calculator.calculate(&anomaly);
748
749 let factors = DifficultyFactors::new()
751 .with_concealment(
752 ConcealmentFactors::new()
753 .with_document_manipulation()
754 .with_technique(ConcealmentTechnique::Collusion),
755 )
756 .with_collusion(
757 CollusionFactors::new()
758 .with_participants(2)
759 .with_management(),
760 );
761
762 let enhanced_difficulty = calculator.calculate_with_factors(&anomaly, &factors);
763
764 assert!(enhanced_difficulty.difficulty_score() >= base_difficulty.difficulty_score());
766 }
767
768 #[test]
769 fn test_recommended_methods() {
770 let calculator = DifficultyCalculator::new();
771
772 let trivial_methods = calculator.recommended_methods(AnomalyDetectionDifficulty::Trivial);
773 assert!(trivial_methods.contains(&DetectionMethod::RuleBased));
774
775 let expert_methods = calculator.recommended_methods(AnomalyDetectionDifficulty::Expert);
776 assert!(expert_methods.contains(&DetectionMethod::ForensicAudit));
777 }
778
779 #[test]
780 fn test_infer_factors() {
781 let calculator = DifficultyCalculator::new();
782
783 let kickback = create_test_anomaly(AnomalyType::Fraud(FraudType::KickbackScheme));
784 let factors = calculator.infer_factors(&kickback);
785 assert!(factors.collusion.participants >= 2);
786
787 let fictitious = create_test_anomaly(AnomalyType::Fraud(FraudType::FictitiousEntry));
788 let factors = calculator.infer_factors(&fictitious);
789 assert!(factors.concealment.document_manipulation);
790 }
791
792 #[test]
793 fn test_difficulty_assessment() {
794 let assessment = DifficultyAssessment::new(
795 AnomalyDetectionDifficulty::Hard,
796 0.72,
797 DifficultyFactors::default(),
798 vec![
799 DetectionMethod::GraphBased,
800 DetectionMethod::MachineLearning,
801 ],
802 )
803 .with_indicator("Complex vendor network")
804 .with_indicator("Cross-entity payments");
805
806 assert_eq!(assessment.difficulty, AnomalyDetectionDifficulty::Hard);
807 assert_eq!(assessment.key_indicators.len(), 2);
808 assert!((assessment.expected_detection_rate - 0.40).abs() < 0.01);
809 }
810}