datasynth_generators/anomaly/correlation/
co_occurrence.rs1use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10use datasynth_core::models::AnomalyType;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct CorrelatedAnomaly {
15 pub anomaly_type: AnomalyType,
17 pub probability: f64,
19 pub lag_days_min: i32,
21 pub lag_days_max: i32,
23 pub same_entity: bool,
25 pub description: String,
27}
28
29impl CorrelatedAnomaly {
30 pub fn new(anomaly_type: AnomalyType, probability: f64, lag_range: (i32, i32)) -> Self {
32 Self {
33 anomaly_type,
34 probability: probability.clamp(0.0, 1.0),
35 lag_days_min: lag_range.0,
36 lag_days_max: lag_range.1,
37 same_entity: true,
38 description: String::new(),
39 }
40 }
41
42 pub fn with_same_entity(mut self, same: bool) -> Self {
44 self.same_entity = same;
45 self
46 }
47
48 pub fn with_description(mut self, description: impl Into<String>) -> Self {
50 self.description = description.into();
51 self
52 }
53
54 pub fn random_lag<R: Rng>(&self, rng: &mut R) -> i32 {
56 if self.lag_days_min == self.lag_days_max {
57 return self.lag_days_min;
58 }
59 rng.gen_range(self.lag_days_min..=self.lag_days_max)
60 }
61
62 pub fn should_trigger<R: Rng>(&self, rng: &mut R) -> bool {
64 rng.gen::<f64>() < self.probability
65 }
66}
67
68#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct CoOccurrencePattern {
71 pub name: String,
73 pub description: String,
75 pub primary: AnomalyType,
77 pub correlated: Vec<CorrelatedAnomaly>,
79 pub enabled: bool,
81}
82
83impl CoOccurrencePattern {
84 pub fn new(name: impl Into<String>, primary: AnomalyType) -> Self {
86 Self {
87 name: name.into(),
88 description: String::new(),
89 primary,
90 correlated: Vec::new(),
91 enabled: true,
92 }
93 }
94
95 pub fn with_description(mut self, description: impl Into<String>) -> Self {
97 self.description = description.into();
98 self
99 }
100
101 pub fn with_correlated(mut self, correlated: CorrelatedAnomaly) -> Self {
103 self.correlated.push(correlated);
104 self
105 }
106
107 pub fn with_enabled(mut self, enabled: bool) -> Self {
109 self.enabled = enabled;
110 self
111 }
112
113 pub fn matches(&self, anomaly_type: &AnomalyType) -> bool {
115 self.enabled && self.primary == *anomaly_type
116 }
117
118 pub fn get_triggered_correlations<R: Rng>(&self, rng: &mut R) -> Vec<&CorrelatedAnomaly> {
120 self.correlated
121 .iter()
122 .filter(|c| c.should_trigger(rng))
123 .collect()
124 }
125}
126
127#[derive(Debug, Clone)]
129pub struct AnomalyCoOccurrence {
130 patterns: Vec<CoOccurrencePattern>,
132}
133
134impl Default for AnomalyCoOccurrence {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140impl AnomalyCoOccurrence {
141 pub fn new() -> Self {
143 Self {
144 patterns: Self::default_patterns(),
145 }
146 }
147
148 fn default_patterns() -> Vec<CoOccurrencePattern> {
150 use datasynth_core::models::{ErrorType, FraudType, ProcessIssueType};
151
152 vec![
153 CoOccurrencePattern::new(
155 "fraud_concealment",
156 AnomalyType::Fraud(FraudType::FictitiousVendor),
157 )
158 .with_description("Fictitious vendor fraud typically involves document manipulation and approval bypass")
159 .with_correlated(
160 CorrelatedAnomaly::new(
161 AnomalyType::Fraud(FraudType::InvoiceManipulation),
162 0.80,
163 (0, 30),
164 )
165 .with_description("Document manipulation to support fictitious vendor"),
166 )
167 .with_correlated(
168 CorrelatedAnomaly::new(
169 AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
170 0.60,
171 (0, 15),
172 )
173 .with_description("Approval bypass to expedite fraudulent payments"),
174 )
175 .with_correlated(
176 CorrelatedAnomaly::new(
177 AnomalyType::Fraud(FraudType::DuplicatePayment),
178 0.30,
179 (15, 60),
180 )
181 .with_same_entity(true)
182 .with_description("Multiple payments to the fictitious vendor"),
183 ),
184
185 CoOccurrencePattern::new(
187 "error_cascade",
188 AnomalyType::Error(ErrorType::MisclassifiedAccount),
189 )
190 .with_description("Account misclassification leads to reconciliation issues and corrections")
191 .with_correlated(
192 CorrelatedAnomaly::new(
193 AnomalyType::Error(ErrorType::DuplicateEntry),
194 0.40,
195 (1, 10),
196 )
197 .with_description("Attempt to correct misclassification creates duplicate"),
198 )
199 .with_correlated(
200 CorrelatedAnomaly::new(
201 AnomalyType::Error(ErrorType::WrongPeriod),
202 0.30,
203 (5, 30),
204 )
205 .with_description("Correction posted to wrong period"),
206 ),
207
208 CoOccurrencePattern::new(
210 "process_breakdown",
211 AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval),
212 )
213 .with_description("Skipped approvals often accompanied by other control bypasses")
214 .with_correlated(
215 CorrelatedAnomaly::new(
216 AnomalyType::Fraud(FraudType::SplitTransaction),
217 0.50,
218 (0, 7),
219 )
220 .with_description("Transaction splitting to avoid threshold"),
221 )
222 .with_correlated(
223 CorrelatedAnomaly::new(
224 AnomalyType::ProcessIssue(ProcessIssueType::LatePosting),
225 0.40,
226 (0, 5),
227 )
228 .with_description("Late posting to avoid immediate detection"),
229 ),
230
231 CoOccurrencePattern::new(
233 "kickback_concealment",
234 AnomalyType::Fraud(FraudType::Kickback),
235 )
236 .with_description("Kickback schemes involve price inflation and approval manipulation")
237 .with_correlated(
238 CorrelatedAnomaly::new(
239 AnomalyType::Fraud(FraudType::InvoiceManipulation),
240 0.85,
241 (0, 14),
242 )
243 .with_description("Invoice price inflation"),
244 )
245 .with_correlated(
246 CorrelatedAnomaly::new(
247 AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation),
248 0.45,
249 (0, 30),
250 )
251 .with_description("SoD violation to approve own vendor"),
252 ),
253
254 CoOccurrencePattern::new(
256 "revenue_manipulation_concealment",
257 AnomalyType::Fraud(FraudType::RevenueManipulation),
258 )
259 .with_description("Revenue manipulation often involves expense deferral and reserve manipulation")
260 .with_correlated(
261 CorrelatedAnomaly::new(
262 AnomalyType::Fraud(FraudType::ImproperCapitalization),
263 0.60,
264 (0, 30),
265 )
266 .with_description("Capitalize expenses to boost current period income"),
267 )
268 .with_correlated(
269 CorrelatedAnomaly::new(
270 AnomalyType::Fraud(FraudType::ReserveManipulation),
271 0.50,
272 (30, 90),
273 )
274 .with_description("Release reserves to meet targets"),
275 ),
276 ]
277 }
278
279 pub fn add_pattern(&mut self, pattern: CoOccurrencePattern) {
281 self.patterns.push(pattern);
282 }
283
284 pub fn get_matching_patterns(&self, anomaly_type: &AnomalyType) -> Vec<&CoOccurrencePattern> {
286 self.patterns
287 .iter()
288 .filter(|p| p.matches(anomaly_type))
289 .collect()
290 }
291
292 pub fn get_correlated_anomalies<R: Rng>(
294 &self,
295 anomaly_type: &AnomalyType,
296 rng: &mut R,
297 ) -> Vec<CorrelatedAnomalyResult> {
298 let mut results = Vec::new();
299
300 for pattern in self.get_matching_patterns(anomaly_type) {
301 for correlated in pattern.get_triggered_correlations(rng) {
302 let lag = correlated.random_lag(rng);
303 results.push(CorrelatedAnomalyResult {
304 pattern_name: pattern.name.clone(),
305 anomaly_type: correlated.anomaly_type.clone(),
306 lag_days: lag,
307 same_entity: correlated.same_entity,
308 description: correlated.description.clone(),
309 });
310 }
311 }
312
313 results
314 }
315
316 pub fn patterns(&self) -> &[CoOccurrencePattern] {
318 &self.patterns
319 }
320
321 pub fn set_pattern_enabled(&mut self, name: &str, enabled: bool) {
323 for pattern in &mut self.patterns {
324 if pattern.name == name {
325 pattern.enabled = enabled;
326 break;
327 }
328 }
329 }
330}
331
332#[derive(Debug, Clone)]
334pub struct CorrelatedAnomalyResult {
335 pub pattern_name: String,
337 pub anomaly_type: AnomalyType,
339 pub lag_days: i32,
341 pub same_entity: bool,
343 pub description: String,
345}
346
347#[cfg(test)]
348mod tests {
349 use super::*;
350 use datasynth_core::models::FraudType;
351 use rand::SeedableRng;
352 use rand_chacha::ChaCha8Rng;
353
354 #[test]
355 fn test_correlated_anomaly() {
356 let correlated = CorrelatedAnomaly::new(
357 AnomalyType::Fraud(FraudType::InvoiceManipulation),
358 0.80,
359 (0, 30),
360 )
361 .with_description("Test correlation");
362
363 assert!((correlated.probability - 0.80).abs() < 0.01);
364 assert_eq!(correlated.lag_days_min, 0);
365 assert_eq!(correlated.lag_days_max, 30);
366 }
367
368 #[test]
369 fn test_correlated_anomaly_trigger() {
370 let correlated = CorrelatedAnomaly::new(
371 AnomalyType::Fraud(FraudType::InvoiceManipulation),
372 1.0, (0, 0),
374 );
375
376 let mut rng = ChaCha8Rng::seed_from_u64(42);
377 assert!(correlated.should_trigger(&mut rng));
378 }
379
380 #[test]
381 fn test_co_occurrence_pattern() {
382 let pattern = CoOccurrencePattern::new(
383 "test_pattern",
384 AnomalyType::Fraud(FraudType::FictitiousVendor),
385 )
386 .with_correlated(CorrelatedAnomaly::new(
387 AnomalyType::Fraud(FraudType::InvoiceManipulation),
388 0.80,
389 (0, 30),
390 ));
391
392 assert!(pattern.matches(&AnomalyType::Fraud(FraudType::FictitiousVendor)));
393 assert!(!pattern.matches(&AnomalyType::Fraud(FraudType::DuplicatePayment)));
394 }
395
396 #[test]
397 fn test_anomaly_co_occurrence() {
398 let co_occurrence = AnomalyCoOccurrence::new();
399 assert!(!co_occurrence.patterns().is_empty());
400
401 let patterns =
403 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
404 assert!(!patterns.is_empty());
405 }
406
407 #[test]
408 fn test_get_correlated_anomalies() {
409 let co_occurrence = AnomalyCoOccurrence::new();
410 let mut rng = ChaCha8Rng::seed_from_u64(42);
411
412 let results = co_occurrence
414 .get_correlated_anomalies(&AnomalyType::Fraud(FraudType::FictitiousVendor), &mut rng);
415
416 assert!(results.len() <= 4); }
421
422 #[test]
423 fn test_pattern_enable_disable() {
424 let mut co_occurrence = AnomalyCoOccurrence::new();
425
426 co_occurrence.set_pattern_enabled("fraud_concealment", false);
427
428 let patterns =
429 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
430 assert!(patterns.is_empty());
431
432 co_occurrence.set_pattern_enabled("fraud_concealment", true);
433
434 let patterns =
435 co_occurrence.get_matching_patterns(&AnomalyType::Fraud(FraudType::FictitiousVendor));
436 assert!(!patterns.is_empty());
437 }
438}