Skip to main content

datasynth_generators/anomaly/
confidence.rs

1//! Confidence calculation for anomaly detection.
2//!
3//! This module provides dynamic confidence scoring based on multiple factors:
4//! - Pattern clarity (how clear is the anomalous pattern)
5//! - Anomaly strength (magnitude of deviation)
6//! - Detectability (automated detection likelihood)
7//! - Context match (supporting evidence)
8
9use datasynth_core::models::{
10    AnomalyType, ContributingFactor, ErrorType, FactorType, FraudType, ProcessIssueType,
11    RelationalAnomalyType, StatisticalAnomalyType,
12};
13use rust_decimal::Decimal;
14
15/// Configuration for confidence calculation.
16#[derive(Debug, Clone)]
17pub struct ConfidenceConfig {
18    /// Weight for pattern clarity component.
19    pub pattern_clarity_weight: f64,
20    /// Weight for anomaly strength component.
21    pub strength_weight: f64,
22    /// Weight for detectability component.
23    pub detectability_weight: f64,
24    /// Weight for context match component.
25    pub context_weight: f64,
26    /// Materiality threshold for amount-based anomalies.
27    pub materiality_threshold: Decimal,
28}
29
30impl Default for ConfidenceConfig {
31    fn default() -> Self {
32        Self {
33            pattern_clarity_weight: 0.30,
34            strength_weight: 0.25,
35            detectability_weight: 0.25,
36            context_weight: 0.20,
37            materiality_threshold: Decimal::new(10000, 0), // 10,000
38        }
39    }
40}
41
42impl ConfidenceConfig {
43    /// Validates that weights sum to 1.0.
44    pub fn validate(&self) -> Result<(), String> {
45        let sum = self.pattern_clarity_weight
46            + self.strength_weight
47            + self.detectability_weight
48            + self.context_weight;
49
50        if (sum - 1.0).abs() > 0.01 {
51            return Err(format!("Confidence weights must sum to 1.0, got {}", sum));
52        }
53
54        Ok(())
55    }
56}
57
58/// Context for confidence calculation.
59#[derive(Debug, Clone, Default)]
60pub struct ConfidenceContext {
61    /// Amount involved in the anomaly.
62    pub amount: Option<Decimal>,
63    /// Normal/expected amount for comparison.
64    pub expected_amount: Option<Decimal>,
65    /// Number of similar anomalies previously detected.
66    pub prior_anomaly_count: usize,
67    /// Entity risk score (0.0 - 1.0).
68    pub entity_risk_score: f64,
69    /// Whether the anomaly was detected by automated rules.
70    pub auto_detected: bool,
71    /// Number of supporting evidence items.
72    pub evidence_count: usize,
73    /// Pattern match confidence (0.0 - 1.0).
74    pub pattern_confidence: f64,
75    /// Time-based anomaly indicators.
76    pub timing_score: f64,
77}
78
79/// Calculator for anomaly confidence scores.
80#[derive(Debug, Clone)]
81pub struct ConfidenceCalculator {
82    config: ConfidenceConfig,
83}
84
85impl ConfidenceCalculator {
86    /// Creates a new confidence calculator with default config.
87    pub fn new() -> Self {
88        Self {
89            config: ConfidenceConfig::default(),
90        }
91    }
92
93    /// Creates a new confidence calculator with custom config.
94    pub fn with_config(config: ConfidenceConfig) -> Self {
95        Self { config }
96    }
97
98    /// Calculates confidence score for an anomaly.
99    ///
100    /// Returns a tuple of (confidence_score, contributing_factors).
101    pub fn calculate(
102        &self,
103        anomaly_type: &AnomalyType,
104        context: &ConfidenceContext,
105    ) -> (f64, Vec<ContributingFactor>) {
106        let mut factors = Vec::new();
107
108        // Component 1: Pattern Clarity
109        let pattern_clarity = self.calculate_pattern_clarity(anomaly_type, context);
110        factors.push(ContributingFactor::new(
111            FactorType::PatternMatch,
112            pattern_clarity,
113            0.5, // Threshold for "clear" pattern
114            true,
115            self.config.pattern_clarity_weight,
116            &format!("Pattern clarity score: {:.2}", pattern_clarity),
117        ));
118
119        // Component 2: Anomaly Strength
120        let strength = self.calculate_anomaly_strength(anomaly_type, context);
121        factors.push(ContributingFactor::new(
122            FactorType::AmountDeviation,
123            strength,
124            0.3, // Threshold for "strong" anomaly
125            true,
126            self.config.strength_weight,
127            &format!("Anomaly strength: {:.2}", strength),
128        ));
129
130        // Component 3: Detectability
131        let detectability = self.calculate_detectability(anomaly_type, context);
132        factors.push(ContributingFactor::new(
133            FactorType::PatternMatch,
134            detectability,
135            0.5,
136            true,
137            self.config.detectability_weight,
138            &format!("Auto-detectability: {:.2}", detectability),
139        ));
140
141        // Component 4: Context Match
142        let context_match = self.calculate_context_match(context);
143        factors.push(ContributingFactor::new(
144            FactorType::EntityRisk,
145            context_match,
146            0.3,
147            true,
148            self.config.context_weight,
149            &format!("Context match score: {:.2}", context_match),
150        ));
151
152        // Calculate weighted sum
153        let confidence = pattern_clarity * self.config.pattern_clarity_weight
154            + strength * self.config.strength_weight
155            + detectability * self.config.detectability_weight
156            + context_match * self.config.context_weight;
157
158        (confidence.clamp(0.0, 1.0), factors)
159    }
160
161    /// Calculates pattern clarity based on anomaly type.
162    fn calculate_pattern_clarity(
163        &self,
164        anomaly_type: &AnomalyType,
165        context: &ConfidenceContext,
166    ) -> f64 {
167        // Base clarity from anomaly type
168        let base_clarity = match anomaly_type {
169            AnomalyType::Fraud(fraud_type) => match fraud_type {
170                FraudType::DuplicatePayment => 0.95, // Very clear pattern
171                FraudType::SelfApproval => 0.90,
172                FraudType::SegregationOfDutiesViolation => 0.85,
173                FraudType::JustBelowThreshold => 0.80,
174                FraudType::RoundDollarManipulation => 0.70,
175                FraudType::FictitiousVendor => 0.60, // Requires investigation
176                FraudType::CollusiveApproval => 0.50, // Hard to detect
177                _ => 0.65,
178            },
179            AnomalyType::Error(error_type) => match error_type {
180                ErrorType::DuplicateEntry => 0.95,
181                ErrorType::ReversedAmount => 0.90,
182                ErrorType::UnbalancedEntry => 0.95,
183                ErrorType::MissingField => 0.85,
184                _ => 0.75,
185            },
186            AnomalyType::ProcessIssue(process_type) => match process_type {
187                ProcessIssueType::SkippedApproval => 0.90,
188                ProcessIssueType::MissingDocumentation => 0.85,
189                ProcessIssueType::ManualOverride => 0.80,
190                _ => 0.70,
191            },
192            AnomalyType::Statistical(stat_type) => match stat_type {
193                StatisticalAnomalyType::BenfordViolation => 0.75,
194                StatisticalAnomalyType::StatisticalOutlier => 0.70,
195                StatisticalAnomalyType::UnusuallyHighAmount => 0.65,
196                _ => 0.60,
197            },
198            AnomalyType::Relational(rel_type) => match rel_type {
199                RelationalAnomalyType::CircularTransaction => 0.85,
200                RelationalAnomalyType::DormantAccountActivity => 0.80,
201                _ => 0.65,
202            },
203            AnomalyType::Custom(_) => 0.50,
204        };
205
206        // Adjust based on pattern confidence from context
207        let adjusted = base_clarity * 0.7 + context.pattern_confidence * 0.3;
208
209        adjusted.clamp(0.0, 1.0)
210    }
211
212    /// Calculates anomaly strength based on deviation magnitude.
213    fn calculate_anomaly_strength(
214        &self,
215        anomaly_type: &AnomalyType,
216        context: &ConfidenceContext,
217    ) -> f64 {
218        // Amount-based strength
219        let amount_strength =
220            if let (Some(amount), Some(expected)) = (context.amount, context.expected_amount) {
221                let deviation = (amount - expected).abs();
222                let expected_f64: f64 = expected.try_into().unwrap_or(1.0);
223                let deviation_f64: f64 = deviation.try_into().unwrap_or(0.0);
224
225                if expected_f64.abs() > 0.01 {
226                    (deviation_f64 / expected_f64.abs()).min(2.0) / 2.0 // Normalize to [0, 1]
227                } else {
228                    0.5
229                }
230            } else {
231                0.5 // Default when no amount context
232            };
233
234        // Type-based strength modifier
235        let type_modifier = match anomaly_type {
236            AnomalyType::Fraud(_) => 1.2, // Fraud is inherently severe
237            AnomalyType::Statistical(_) => 1.0,
238            AnomalyType::Relational(_) => 1.1,
239            AnomalyType::Error(_) => 0.9,
240            AnomalyType::ProcessIssue(_) => 0.85,
241            AnomalyType::Custom(_) => 1.0,
242        };
243
244        (amount_strength * type_modifier).clamp(0.0, 1.0)
245    }
246
247    /// Calculates detectability based on anomaly type and context.
248    fn calculate_detectability(
249        &self,
250        anomaly_type: &AnomalyType,
251        context: &ConfidenceContext,
252    ) -> f64 {
253        // Base detectability from anomaly type
254        let base_detectability = match anomaly_type {
255            AnomalyType::Error(error_type) => match error_type {
256                ErrorType::UnbalancedEntry => 1.0, // Always detected
257                ErrorType::DuplicateEntry => 0.95,
258                ErrorType::MissingField => 0.90,
259                _ => 0.80,
260            },
261            AnomalyType::Fraud(fraud_type) => match fraud_type {
262                FraudType::DuplicatePayment => 0.90,
263                FraudType::SelfApproval => 0.85,
264                FraudType::JustBelowThreshold => 0.75,
265                FraudType::CollusiveApproval => 0.40, // Hard to auto-detect
266                FraudType::FictitiousVendor => 0.45,
267                _ => 0.60,
268            },
269            AnomalyType::ProcessIssue(_) => 0.70,
270            AnomalyType::Statistical(_) => 0.65,
271            AnomalyType::Relational(_) => 0.55,
272            AnomalyType::Custom(_) => 0.50,
273        };
274
275        // Boost if already auto-detected
276        let auto_detect_boost: f64 = if context.auto_detected { 0.2 } else { 0.0 };
277
278        (base_detectability + auto_detect_boost).clamp(0.0, 1.0)
279    }
280
281    /// Calculates context match score.
282    fn calculate_context_match(&self, context: &ConfidenceContext) -> f64 {
283        let mut score = 0.0;
284
285        // Entity risk contribution
286        score += context.entity_risk_score * 0.4;
287
288        // Prior anomaly count contribution (repeat offenders)
289        let prior_contribution = (context.prior_anomaly_count as f64 / 5.0).min(1.0) * 0.3;
290        score += prior_contribution;
291
292        // Evidence count contribution
293        let evidence_contribution = (context.evidence_count as f64 / 3.0).min(1.0) * 0.2;
294        score += evidence_contribution;
295
296        // Timing score contribution
297        score += context.timing_score * 0.1;
298
299        score.clamp(0.0, 1.0)
300    }
301}
302
303impl Default for ConfidenceCalculator {
304    fn default() -> Self {
305        Self::new()
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312    use rust_decimal_macros::dec;
313
314    #[test]
315    fn test_confidence_calculator_basic() {
316        let calculator = ConfidenceCalculator::new();
317        let anomaly_type = AnomalyType::Fraud(FraudType::DuplicatePayment);
318        let context = ConfidenceContext::default();
319
320        let (confidence, factors) = calculator.calculate(&anomaly_type, &context);
321
322        assert!(confidence >= 0.0 && confidence <= 1.0);
323        assert!(!factors.is_empty());
324    }
325
326    #[test]
327    fn test_confidence_with_amount_context() {
328        let calculator = ConfidenceCalculator::new();
329        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
330
331        let context = ConfidenceContext {
332            amount: Some(dec!(100000)),
333            expected_amount: Some(dec!(10000)),
334            ..Default::default()
335        };
336
337        let (confidence, _) = calculator.calculate(&anomaly_type, &context);
338
339        // High deviation should increase confidence
340        assert!(confidence > 0.3);
341    }
342
343    #[test]
344    fn test_confidence_with_entity_risk() {
345        let calculator = ConfidenceCalculator::new();
346        let anomaly_type = AnomalyType::Fraud(FraudType::FictitiousVendor);
347
348        let low_risk_context = ConfidenceContext {
349            entity_risk_score: 0.1,
350            ..Default::default()
351        };
352
353        let high_risk_context = ConfidenceContext {
354            entity_risk_score: 0.9,
355            prior_anomaly_count: 5,
356            ..Default::default()
357        };
358
359        let (low_confidence, _) = calculator.calculate(&anomaly_type, &low_risk_context);
360        let (high_confidence, _) = calculator.calculate(&anomaly_type, &high_risk_context);
361
362        // High risk entity should have higher confidence
363        assert!(high_confidence > low_confidence);
364    }
365
366    #[test]
367    fn test_config_validation() {
368        let valid_config = ConfidenceConfig::default();
369        assert!(valid_config.validate().is_ok());
370
371        let invalid_config = ConfidenceConfig {
372            pattern_clarity_weight: 0.5,
373            strength_weight: 0.5,
374            detectability_weight: 0.5,
375            context_weight: 0.5, // Sum = 2.0
376            ..Default::default()
377        };
378        assert!(invalid_config.validate().is_err());
379    }
380
381    #[test]
382    fn test_auto_detected_boost() {
383        let calculator = ConfidenceCalculator::new();
384        let anomaly_type = AnomalyType::Error(ErrorType::DuplicateEntry);
385
386        let not_detected = ConfidenceContext {
387            auto_detected: false,
388            ..Default::default()
389        };
390
391        let detected = ConfidenceContext {
392            auto_detected: true,
393            ..Default::default()
394        };
395
396        let (conf_not, _) = calculator.calculate(&anomaly_type, &not_detected);
397        let (conf_detected, _) = calculator.calculate(&anomaly_type, &detected);
398
399        // Auto-detected should have higher confidence
400        assert!(conf_detected > conf_not);
401    }
402}