datasynth_generators/anomaly/correlation/
co_occurrence.rs1use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10use datasynth_core::models::AnomalyType;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CorrelatedAnomaly {
15 pub anomaly_type: AnomalyType,
17 pub probability: f64,
19 pub lag_days_min: i32,
21 pub lag_days_max: i32,
23 pub same_entity: bool,
25 pub description: String,
27}
28
29impl CorrelatedAnomaly {
30 pub fn new(anomaly_type: AnomalyType, probability: f64, lag_range: (i32, i32)) -> Self {
32 Self {
33 anomaly_type,
34 probability: probability.clamp(0.0, 1.0),
35 lag_days_min: lag_range.0,
36 lag_days_max: lag_range.1,
37 same_entity: true,
38 description: String::new(),
39 }
40 }
41
42 pub fn with_same_entity(mut self, same: bool) -> Self {
44 self.same_entity = same;
45 self
46 }
47
48 pub fn with_description(mut self, description: impl Into<String>) -> Self {
50 self.description = description.into();
51 self
52 }
53
54 pub fn random_lag<R: Rng>(&self, rng: &mut R) -> i32 {
56 if self.lag_days_min == self.lag_days_max {
57 return self.lag_days_min;
58 }
59 rng.random_range(self.lag_days_min..=self.lag_days_max)
60 }
61
62 pub fn should_trigger<R: Rng>(&self, rng: &mut R) -> bool {
64 rng.random::<f64>() < self.probability
65 }
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct CoOccurrencePattern {
71 pub name: String,
73 pub description: String,
75 pub primary: AnomalyType,
77 pub correlated: Vec<CorrelatedAnomaly>,
79 pub enabled: bool,
81}
82
83impl CoOccurrencePattern {
84 pub fn new(name: impl Into<String>, primary: AnomalyType) -> Self {
86 Self {
87 name: name.into(),
88 description: String::new(),
89 primary,
90 correlated: Vec::new(),
91 enabled: true,
92 }
93 }
94
95 pub fn with_description(mut self, description: impl Into<String>) -> Self {
97 self.description = description.into();
98 self
99 }
100
101 pub fn with_correlated(mut self, correlated: CorrelatedAnomaly) -> Self {
103 self.correlated.push(correlated);
104 self
105 }
106
107 pub fn with_enabled(mut self, enabled: bool) -> Self {
109 self.enabled = enabled;
110 self
111 }
112
113 pub fn matches(&self, anomaly_type: &AnomalyType) -> bool {
115 self.enabled && self.primary == *anomaly_type
116 }
117
118 pub fn get_triggered_correlations<R: Rng>(&self, rng: &mut R) -> Vec<&CorrelatedAnomaly> {
120 self.correlated
121 .iter()
122 .filter(|c| c.should_trigger(rng))
123 .collect()
124 }
125}
126
127#[derive(Debug, Clone)]
129pub struct AnomalyCoOccurrence {
130 patterns: Vec<CoOccurrencePattern>,
132}
133
134impl Default for AnomalyCoOccurrence {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140impl AnomalyCoOccurrence {
141 pub fn new() -> Self {
143 Self {
144 patterns: Self::default_patterns(),
145 }
146 }
147
148 fn default_patterns() -> Vec<CoOccurrencePattern> {
150 use datasynth_core::models::{ErrorType, FraudType, ProcessIssueType};
151
152 vec![
153 CoOccurrencePattern::new(
155 "fraud_concealment",
156 AnomalyType::Fraud(FraudType::FictitiousVendor),
157 )
158 .with_description("Fictitious vendor fraud typically involves document manipulation and approval bypass")
159 .with_correlated(
160 CorrelatedAnomaly::new(
161 AnomalyType::Fraud(FraudType::InvoiceManipulation),
162 0.80,
163 (0, 30),
164 )
165 .with_description("Document manipulation to support fictitious vendor"),
166 )
167 .with_correlated(
168 CorrelatedAnomaly::new(
169 AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
170 0.60,
171 (0, 15),
172 )
173 .with_description("Approval bypass to expedite fraudulent payments"),
174 )
175 .with_correlated(
176 CorrelatedAnomaly::new(
177 AnomalyType::Fraud(FraudType::DuplicatePayment),
178 0.30,
179 (15, 60),
180 )
181 .with_same_entity(true)
182 .with_description("Multiple payments to the fictitious vendor"),
183 ),
184
185 CoOccurrencePattern::new(
187 "error_cascade",
188 AnomalyType::Error(ErrorType::MisclassifiedAccount),
189 )
190 .with_description("Account misclassification leads to reconciliation issues and corrections")
191 .with_correlated(
192 CorrelatedAnomaly::new(
193 AnomalyType::Error(ErrorType::DuplicateEntry),
194 0.40,
195 (1, 10),
196 )
197 .with_description("Attempt to correct misclassification creates duplicate"),
198 )
199 .with_correlated(
200 CorrelatedAnomaly::new(
201 AnomalyType::Error(ErrorType::WrongPeriod),
202 0.30,
203 (5, 30),
204 )
205 .with_description("Correction posted to wrong period"),
206 ),
207
208 CoOccurrencePattern::new(
210 "process_breakdown",
211 AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
212 )
213 .with_description("Skipped approvals often accompanied by other control bypasses")
214 .with_correlated(
215 CorrelatedAnomaly::new(
216 AnomalyType::Fraud(FraudType::SplitTransaction),
217 0.50,
218 (0, 7),
219 )
220 .with_description("Transaction splitting to avoid threshold"),
221 )
222 .with_correlated(
223 CorrelatedAnomaly::new(
224 AnomalyType::ProcessIssue(ProcessIssueType::LatePosting),
225 0.40,
226 (0, 5),
227 )
228 .with_description("Late posting to avoid immediate detection"),
229 ),
230
231 CoOccurrencePattern::new(
233 "kickback_concealment",
234 AnomalyType::Fraud(FraudType::Kickback),
235 )
236 .with_description("Kickback schemes involve price inflation and approval manipulation")
237 .with_correlated(
238 CorrelatedAnomaly::new(
239 AnomalyType::Fraud(FraudType::InvoiceManipulation),
240 0.85,
241 (0, 14),
242 )
243 .with_description("Invoice price inflation"),
244 )
245 .with_correlated(
246 CorrelatedAnomaly::new(
247 AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation),
248 0.45,
249 (0, 30),
250 )
251 .with_description("SoD violation to approve own vendor"),
252 ),
253
254 CoOccurrencePattern::new(
256 "revenue_manipulation_concealment",
257 AnomalyType::Fraud(FraudType::RevenueManipulation),
258 )
259 .with_description("Revenue manipulation often involves expense deferral and reserve manipulation")
260 .with_correlated(
261 CorrelatedAnomaly::new(
262 AnomalyType::Fraud(FraudType::ImproperCapitalization),
263 0.60,
264 (0, 30),
265 )
266 .with_description("Capitalize expenses to boost current period income"),
267 )
268 .with_correlated(
269 CorrelatedAnomaly::new(
270 AnomalyType::Fraud(FraudType::ReserveManipulation),
271 0.50,
272 (30, 90),
273 )
274 .with_description("Release reserves to meet targets"),
275 ),
276 ]
277 }
278
279 pub fn add_pattern(&mut self, pattern: CoOccurrencePattern) {
281 self.patterns.push(pattern);
282 }
283
284 pub fn get_matching_patterns(&self, anomaly_type: &AnomalyType) -> Vec<&CoOccurrencePattern> {
286 self.patterns
287 .iter()
288 .filter(|p| p.matches(anomaly_type))
289 .collect()
290 }
291
292 pub fn get_correlated_anomalies<R: Rng>(
294 &self,
295 anomaly_type: &AnomalyType,
296 rng: &mut R,
297 ) -> Vec<CorrelatedAnomalyResult> {
298 let mut results = Vec::new();
299
300 for pattern in self.get_matching_patterns(anomaly_type) {
301 for correlated in pattern.get_triggered_correlations(rng) {
302 let lag = correlated.random_lag(rng);
303 results.push(CorrelatedAnomalyResult {
304 pattern_name: pattern.name.clone(),
305 anomaly_type: correlated.anomaly_type.clone(),
306 lag_days: lag,
307 same_entity: correlated.same_entity,
308 description: correlated.description.clone(),
309 });
310 }
311 }
312
313 results
314 }
315
316 pub fn patterns(&self) -> &[CoOccurrencePattern] {
318 &self.patterns
319 }
320
321 pub fn set_pattern_enabled(&mut self, name: &str, enabled: bool) {
323 for pattern in &mut self.patterns {
324 if pattern.name == name {
325 pattern.enabled = enabled;
326 break;
327 }
328 }
329 }
330}
331
332#[derive(Debug, Clone)]
334pub struct CorrelatedAnomalyResult {
335 pub pattern_name: String,
337 pub anomaly_type: AnomalyType,
339 pub lag_days: i32,
341 pub same_entity: bool,
343 pub description: String,
345}
346
347#[cfg(test)]
348#[allow(clippy::unwrap_used)]
349mod tests {
350 use super::*;
351 use datasynth_core::models::FraudType;
352 use rand::SeedableRng;
353 use rand_chacha::ChaCha8Rng;
354
355 #[test]
356 fn test_correlated_anomaly() {
357 let correlated = CorrelatedAnomaly::new(
358 AnomalyType::Fraud(FraudType::InvoiceManipulation),
359 0.80,
360 (0, 30),
361 )
362 .with_description("Test correlation");
363
364 assert!((correlated.probability - 0.80).abs() < 0.01);
365 assert_eq!(correlated.lag_days_min, 0);
366 assert_eq!(correlated.lag_days_max, 30);
367 }
368
369 #[test]
370 fn test_correlated_anomaly_trigger() {
371 let correlated = CorrelatedAnomaly::new(
372 AnomalyType::Fraud(FraudType::InvoiceManipulation),
373 1.0, (0, 0),
375 );
376
377 let mut rng = ChaCha8Rng::seed_from_u64(42);
378 assert!(correlated.should_trigger(&mut rng));
379 }
380
381 #[test]
382 fn test_co_occurrence_pattern() {
383 let pattern = CoOccurrencePattern::new(
384 "test_pattern",
385 AnomalyType::Fraud(FraudType::FictitiousVendor),
386 )
387 .with_correlated(CorrelatedAnomaly::new(
388 AnomalyType::Fraud(FraudType::InvoiceManipulation),
389 0.80,
390 (0, 30),
391 ));
392
393 assert!(pattern.matches(&AnomalyType::Fraud(FraudType::FictitiousVendor)));
394 assert!(!pattern.matches(&AnomalyType::Fraud(FraudType::DuplicatePayment)));
395 }
396
397 #[test]
398 fn test_anomaly_co_occurrence() {
399 let co_occurrence = AnomalyCoOccurrence::new();
400 assert!(!co_occurrence.patterns().is_empty());
401
402 let patterns =
404 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
405 assert!(!patterns.is_empty());
406 }
407
408 #[test]
409 fn test_get_correlated_anomalies() {
410 let co_occurrence = AnomalyCoOccurrence::new();
411 let mut rng = ChaCha8Rng::seed_from_u64(42);
412
413 let results = co_occurrence
415 .get_correlated_anomalies(&AnomalyType::Fraud(FraudType::FictitiousVendor), &mut rng);
416
417 assert!(results.len() <= 4); }
422
423 #[test]
424 fn test_pattern_enable_disable() {
425 let mut co_occurrence = AnomalyCoOccurrence::new();
426
427 co_occurrence.set_pattern_enabled("fraud_concealment", false);
428
429 let patterns =
430 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
431 assert!(patterns.is_empty());
432
433 co_occurrence.set_pattern_enabled("fraud_concealment", true);
434
435 let patterns =
436 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
437 assert!(!patterns.is_empty());
438 }
439}