Skip to main content

datasynth_generators/anomaly/correlation/
co_occurrence.rs

1//! Anomaly co-occurrence patterns.
2//!
3//! Defines patterns where certain anomalies tend to appear together,
4//! such as fraud concealment patterns where a fictitious vendor
5//! is typically accompanied by document manipulation and approval bypass.
6
7use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10use datasynth_core::models::AnomalyType;
11
12/// A correlated anomaly that tends to occur with a primary anomaly.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CorrelatedAnomaly {
15    /// The correlated anomaly type.
16    pub anomaly_type: AnomalyType,
17    /// Probability of this anomaly occurring given the primary (0.0-1.0).
18    pub probability: f64,
19    /// Minimum lag in days from the primary anomaly.
20    pub lag_days_min: i32,
21    /// Maximum lag in days from the primary anomaly.
22    pub lag_days_max: i32,
23    /// Whether this anomaly targets the same entity.
24    pub same_entity: bool,
25    /// Description of the correlation.
26    pub description: String,
27}
28
29impl CorrelatedAnomaly {
30    /// Creates a new correlated anomaly.
31    pub fn new(anomaly_type: AnomalyType, probability: f64, lag_range: (i32, i32)) -> Self {
32        Self {
33            anomaly_type,
34            probability: probability.clamp(0.0, 1.0),
35            lag_days_min: lag_range.0,
36            lag_days_max: lag_range.1,
37            same_entity: true,
38            description: String::new(),
39        }
40    }
41
42    /// Sets whether the correlated anomaly targets the same entity.
43    pub fn with_same_entity(mut self, same: bool) -> Self {
44        self.same_entity = same;
45        self
46    }
47
48    /// Sets the description.
49    pub fn with_description(mut self, description: impl Into<String>) -> Self {
50        self.description = description.into();
51        self
52    }
53
54    /// Generates a random lag within the range.
55    pub fn random_lag<R: Rng>(&self, rng: &mut R) -> i32 {
56        if self.lag_days_min == self.lag_days_max {
57            return self.lag_days_min;
58        }
59        rng.gen_range(self.lag_days_min..=self.lag_days_max)
60    }
61
62    /// Returns whether this anomaly should be triggered.
63    pub fn should_trigger<R: Rng>(&self, rng: &mut R) -> bool {
64        rng.gen::<f64>() < self.probability
65    }
66}
67
68/// A co-occurrence pattern defining which anomalies tend to appear together.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct CoOccurrencePattern {
71    /// Name of the pattern.
72    pub name: String,
73    /// Description of when this pattern applies.
74    pub description: String,
75    /// The primary/triggering anomaly type.
76    pub primary: AnomalyType,
77    /// Correlated anomalies that may occur with the primary.
78    pub correlated: Vec<CorrelatedAnomaly>,
79    /// Whether this pattern is currently active.
80    pub enabled: bool,
81}
82
83impl CoOccurrencePattern {
84    /// Creates a new co-occurrence pattern.
85    pub fn new(name: impl Into<String>, primary: AnomalyType) -> Self {
86        Self {
87            name: name.into(),
88            description: String::new(),
89            primary,
90            correlated: Vec::new(),
91            enabled: true,
92        }
93    }
94
95    /// Sets the description.
96    pub fn with_description(mut self, description: impl Into<String>) -> Self {
97        self.description = description.into();
98        self
99    }
100
101    /// Adds a correlated anomaly.
102    pub fn with_correlated(mut self, correlated: CorrelatedAnomaly) -> Self {
103        self.correlated.push(correlated);
104        self
105    }
106
107    /// Sets whether the pattern is enabled.
108    pub fn with_enabled(mut self, enabled: bool) -> Self {
109        self.enabled = enabled;
110        self
111    }
112
113    /// Checks if this pattern matches a given anomaly type.
114    pub fn matches(&self, anomaly_type: &AnomalyType) -> bool {
115        self.enabled && self.primary == *anomaly_type
116    }
117
118    /// Gets correlated anomalies that should be triggered.
119    pub fn get_triggered_correlations<R: Rng>(&self, rng: &mut R) -> Vec<&CorrelatedAnomaly> {
120        self.correlated
121            .iter()
122            .filter(|c| c.should_trigger(rng))
123            .collect()
124    }
125}
126
127/// Manages co-occurrence patterns for anomaly injection.
128#[derive(Debug, Clone)]
129pub struct AnomalyCoOccurrence {
130    /// All registered patterns.
131    patterns: Vec<CoOccurrencePattern>,
132}
133
134impl Default for AnomalyCoOccurrence {
135    fn default() -> Self {
136        Self::new()
137    }
138}
139
140impl AnomalyCoOccurrence {
141    /// Creates a new co-occurrence manager with default patterns.
142    pub fn new() -> Self {
143        Self {
144            patterns: Self::default_patterns(),
145        }
146    }
147
148    /// Creates default fraud-related co-occurrence patterns.
149    fn default_patterns() -> Vec<CoOccurrencePattern> {
150        use datasynth_core::models::{ErrorType, FraudType, ProcessIssueType};
151
152        vec![
153            // Fraud concealment pattern
154            CoOccurrencePattern::new(
155                "fraud_concealment",
156                AnomalyType::Fraud(FraudType::FictitiousVendor),
157            )
158            .with_description("Fictitious vendor fraud typically involves document manipulation and approval bypass")
159            .with_correlated(
160                CorrelatedAnomaly::new(
161                    AnomalyType::Fraud(FraudType::InvoiceManipulation),
162                    0.80,
163                    (0, 30),
164                )
165                .with_description("Document manipulation to support fictitious vendor"),
166            )
167            .with_correlated(
168                CorrelatedAnomaly::new(
169                    AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
170                    0.60,
171                    (0, 15),
172                )
173                .with_description("Approval bypass to expedite fraudulent payments"),
174            )
175            .with_correlated(
176                CorrelatedAnomaly::new(
177                    AnomalyType::Fraud(FraudType::DuplicatePayment),
178                    0.30,
179                    (15, 60),
180                )
181                .with_same_entity(true)
182                .with_description("Multiple payments to the fictitious vendor"),
183            ),
184
185            // Error cascade pattern
186            CoOccurrencePattern::new(
187                "error_cascade",
188                AnomalyType::Error(ErrorType::MisclassifiedAccount),
189            )
190            .with_description("Account misclassification leads to reconciliation issues and corrections")
191            .with_correlated(
192                CorrelatedAnomaly::new(
193                    AnomalyType::Error(ErrorType::DuplicateEntry),
194                    0.40,
195                    (1, 10),
196                )
197                .with_description("Attempt to correct misclassification creates duplicate"),
198            )
199            .with_correlated(
200                CorrelatedAnomaly::new(
201                    AnomalyType::Error(ErrorType::WrongPeriod),
202                    0.30,
203                    (5, 30),
204                )
205                .with_description("Correction posted to wrong period"),
206            ),
207
208            // Process breakdown pattern
209            CoOccurrencePattern::new(
210                "process_breakdown",
211                AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
212            )
213            .with_description("Skipped approvals often accompanied by other control bypasses")
214            .with_correlated(
215                CorrelatedAnomaly::new(
216                    AnomalyType::Fraud(FraudType::SplitTransaction),
217                    0.50,
218                    (0, 7),
219                )
220                .with_description("Transaction splitting to avoid threshold"),
221            )
222            .with_correlated(
223                CorrelatedAnomaly::new(
224                    AnomalyType::ProcessIssue(ProcessIssueType::LatePosting),
225                    0.40,
226                    (0, 5),
227                )
228                .with_description("Late posting to avoid immediate detection"),
229            ),
230
231            // Kickback concealment pattern
232            CoOccurrencePattern::new(
233                "kickback_concealment",
234                AnomalyType::Fraud(FraudType::Kickback),
235            )
236            .with_description("Kickback schemes involve price inflation and approval manipulation")
237            .with_correlated(
238                CorrelatedAnomaly::new(
239                    AnomalyType::Fraud(FraudType::InvoiceManipulation),
240                    0.85,
241                    (0, 14),
242                )
243                .with_description("Invoice price inflation"),
244            )
245            .with_correlated(
246                CorrelatedAnomaly::new(
247                    AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation),
248                    0.45,
249                    (0, 30),
250                )
251                .with_description("SoD violation to approve own vendor"),
252            ),
253
254            // Revenue manipulation concealment
255            CoOccurrencePattern::new(
256                "revenue_manipulation_concealment",
257                AnomalyType::Fraud(FraudType::RevenueManipulation),
258            )
259            .with_description("Revenue manipulation often involves expense deferral and reserve manipulation")
260            .with_correlated(
261                CorrelatedAnomaly::new(
262                    AnomalyType::Fraud(FraudType::ImproperCapitalization),
263                    0.60,
264                    (0, 30),
265                )
266                .with_description("Capitalize expenses to boost current period income"),
267            )
268            .with_correlated(
269                CorrelatedAnomaly::new(
270                    AnomalyType::Fraud(FraudType::ReserveManipulation),
271                    0.50,
272                    (30, 90),
273                )
274                .with_description("Release reserves to meet targets"),
275            ),
276        ]
277    }
278
279    /// Adds a custom pattern.
280    pub fn add_pattern(&mut self, pattern: CoOccurrencePattern) {
281        self.patterns.push(pattern);
282    }
283
284    /// Gets patterns that match a given anomaly type.
285    pub fn get_matching_patterns(&self, anomaly_type: &AnomalyType) -> Vec<&CoOccurrencePattern> {
286        self.patterns
287            .iter()
288            .filter(|p| p.matches(anomaly_type))
289            .collect()
290    }
291
292    /// Gets correlated anomalies for a given primary anomaly.
293    pub fn get_correlated_anomalies<R: Rng>(
294        &self,
295        anomaly_type: &AnomalyType,
296        rng: &mut R,
297    ) -> Vec<CorrelatedAnomalyResult> {
298        let mut results = Vec::new();
299
300        for pattern in self.get_matching_patterns(anomaly_type) {
301            for correlated in pattern.get_triggered_correlations(rng) {
302                let lag = correlated.random_lag(rng);
303                results.push(CorrelatedAnomalyResult {
304                    pattern_name: pattern.name.clone(),
305                    anomaly_type: correlated.anomaly_type.clone(),
306                    lag_days: lag,
307                    same_entity: correlated.same_entity,
308                    description: correlated.description.clone(),
309                });
310            }
311        }
312
313        results
314    }
315
316    /// Returns all registered patterns.
317    pub fn patterns(&self) -> &[CoOccurrencePattern] {
318        &self.patterns
319    }
320
321    /// Enables or disables a pattern by name.
322    pub fn set_pattern_enabled(&mut self, name: &str, enabled: bool) {
323        for pattern in &mut self.patterns {
324            if pattern.name == name {
325                pattern.enabled = enabled;
326                break;
327            }
328        }
329    }
330}
331
332/// Result of a correlated anomaly check.
333#[derive(Debug, Clone)]
334pub struct CorrelatedAnomalyResult {
335    /// Pattern that triggered this.
336    pub pattern_name: String,
337    /// Anomaly type to inject.
338    pub anomaly_type: AnomalyType,
339    /// Days after the primary anomaly.
340    pub lag_days: i32,
341    /// Whether to target the same entity.
342    pub same_entity: bool,
343    /// Description of the correlation.
344    pub description: String,
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use datasynth_core::models::FraudType;
351    use rand::SeedableRng;
352    use rand_chacha::ChaCha8Rng;
353
354    #[test]
355    fn test_correlated_anomaly() {
356        let correlated = CorrelatedAnomaly::new(
357            AnomalyType::Fraud(FraudType::InvoiceManipulation),
358            0.80,
359            (0, 30),
360        )
361        .with_description("Test correlation");
362
363        assert!((correlated.probability - 0.80).abs() < 0.01);
364        assert_eq!(correlated.lag_days_min, 0);
365        assert_eq!(correlated.lag_days_max, 30);
366    }
367
368    #[test]
369    fn test_correlated_anomaly_trigger() {
370        let correlated = CorrelatedAnomaly::new(
371            AnomalyType::Fraud(FraudType::InvoiceManipulation),
372            1.0, // Always triggers
373            (0, 0),
374        );
375
376        let mut rng = ChaCha8Rng::seed_from_u64(42);
377        assert!(correlated.should_trigger(&mut rng));
378    }
379
380    #[test]
381    fn test_co_occurrence_pattern() {
382        let pattern = CoOccurrencePattern::new(
383            "test_pattern",
384            AnomalyType::Fraud(FraudType::FictitiousVendor),
385        )
386        .with_correlated(CorrelatedAnomaly::new(
387            AnomalyType::Fraud(FraudType::InvoiceManipulation),
388            0.80,
389            (0, 30),
390        ));
391
392        assert!(pattern.matches(&AnomalyType::Fraud(FraudType::FictitiousVendor)));
393        assert!(!pattern.matches(&AnomalyType::Fraud(FraudType::DuplicatePayment)));
394    }
395
396    #[test]
397    fn test_anomaly_co_occurrence() {
398        let co_occurrence = AnomalyCoOccurrence::new();
399        assert!(!co_occurrence.patterns().is_empty());
400
401        // Check that fraud_concealment pattern exists
402        let patterns =
403            co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
404        assert!(!patterns.is_empty());
405    }
406
407    #[test]
408    fn test_get_correlated_anomalies() {
409        let co_occurrence = AnomalyCoOccurrence::new();
410        let mut rng = ChaCha8Rng::seed_from_u64(42);
411
412        // FictitiousVendor should trigger correlated anomalies
413        let results = co_occurrence
414            .get_correlated_anomalies(&AnomalyType::Fraud(FraudType::FictitiousVendor), &mut rng);
415
416        // With high probabilities, should get some results
417        // (depends on RNG, but fraud_concealment has 0.80 probability correlations)
418        // Note: This is probabilistic, so we just check it doesn't panic
419        assert!(results.len() <= 4); // Max 4 correlations in default pattern
420    }
421
422    #[test]
423    fn test_pattern_enable_disable() {
424        let mut co_occurrence = AnomalyCoOccurrence::new();
425
426        co_occurrence.set_pattern_enabled("fraud_concealment", false);
427
428        let patterns =
429            co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
430        assert!(patterns.is_empty());
431
432        co_occurrence.set_pattern_enabled("fraud_concealment", true);
433
434        let patterns =
435            co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
436        assert!(!patterns.is_empty());
437    }
438}