Skip to main content

datasynth_generators/fraud/
red_flags.rs

1//! Red flag generation with correlation probabilities.
2//!
3//! This module generates fraud indicators (red flags) with appropriate
4//! correlation probabilities for both fraudulent and legitimate transactions.
5
6use rand::Rng;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Strength of a red flag indicator.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum RedFlagStrength {
13    /// Strong correlation with fraud (>60% fraud probability).
14    Strong,
15    /// Moderate correlation (30-60% fraud probability).
16    Moderate,
17    /// Weak correlation (<30% fraud probability).
18    Weak,
19}
20
21impl RedFlagStrength {
22    /// Returns the fraud probability range for this strength.
23    pub fn fraud_probability_range(&self) -> (f64, f64) {
24        match self {
25            RedFlagStrength::Strong => (0.60, 0.90),
26            RedFlagStrength::Moderate => (0.30, 0.60),
27            RedFlagStrength::Weak => (0.10, 0.30),
28        }
29    }
30}
31
32/// Category of red flag.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub enum RedFlagCategory {
35    /// Vendor-related flags.
36    Vendor,
37    /// Transaction pattern flags.
38    Transaction,
39    /// Employee behavior flags.
40    Employee,
41    /// Document-related flags.
42    Document,
43    /// Timing-related flags.
44    Timing,
45    /// Account-related flags.
46    Account,
47}
48
49/// A red flag pattern definition.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct RedFlagPattern {
52    /// Unique name of the pattern.
53    pub name: String,
54    /// Human-readable description.
55    pub description: String,
56    /// Category of the flag.
57    pub category: RedFlagCategory,
58    /// Strength of the flag.
59    pub strength: RedFlagStrength,
60    /// Base probability that this flag indicates fraud.
61    pub fraud_probability: f64,
62    /// Probability of flag appearing when fraud is present: P(flag | fraud).
63    pub inject_with_fraud: f64,
64    /// Probability of flag appearing in legitimate transactions: P(flag | not fraud).
65    pub inject_without_fraud: f64,
66    /// Detection methods effective for this flag.
67    pub detection_methods: Vec<String>,
68    /// Related fraud schemes.
69    pub related_schemes: Vec<String>,
70}
71
72impl RedFlagPattern {
73    /// Creates a new red flag pattern.
74    pub fn new(
75        name: impl Into<String>,
76        description: impl Into<String>,
77        category: RedFlagCategory,
78        strength: RedFlagStrength,
79        fraud_probability: f64,
80        inject_with_fraud: f64,
81        inject_without_fraud: f64,
82    ) -> Self {
83        Self {
84            name: name.into(),
85            description: description.into(),
86            category,
87            strength,
88            fraud_probability,
89            inject_with_fraud,
90            inject_without_fraud,
91            detection_methods: Vec::new(),
92            related_schemes: Vec::new(),
93        }
94    }
95
96    /// Adds detection methods.
97    pub fn with_detection_methods(mut self, methods: Vec<impl Into<String>>) -> Self {
98        self.detection_methods = methods.into_iter().map(Into::into).collect();
99        self
100    }
101
102    /// Adds related fraud schemes.
103    pub fn with_related_schemes(mut self, schemes: Vec<impl Into<String>>) -> Self {
104        self.related_schemes = schemes.into_iter().map(Into::into).collect();
105        self
106    }
107
108    /// Calculates the lift (how much more likely fraud is when flag is present).
109    pub fn lift(&self) -> f64 {
110        if self.inject_without_fraud > 0.0 {
111            self.inject_with_fraud / self.inject_without_fraud
112        } else {
113            f64::INFINITY
114        }
115    }
116}
117
118/// An instantiated red flag on a specific transaction.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RedFlag {
121    /// Reference to the pattern name.
122    pub pattern_name: String,
123    /// Document ID where flag was detected.
124    pub document_id: String,
125    /// Category of the flag.
126    pub category: RedFlagCategory,
127    /// Strength of the flag.
128    pub strength: RedFlagStrength,
129    /// Specific details about the flag instance.
130    pub details: HashMap<String, String>,
131    /// Whether this flag is actually associated with fraud.
132    pub is_fraudulent: bool,
133    /// Confidence score (0.0-1.0).
134    pub confidence: f64,
135}
136
137impl RedFlag {
138    /// Creates a new red flag instance.
139    pub fn new(
140        pattern_name: impl Into<String>,
141        document_id: impl Into<String>,
142        category: RedFlagCategory,
143        strength: RedFlagStrength,
144        is_fraudulent: bool,
145    ) -> Self {
146        Self {
147            pattern_name: pattern_name.into(),
148            document_id: document_id.into(),
149            category,
150            strength,
151            details: HashMap::new(),
152            is_fraudulent,
153            confidence: 1.0,
154        }
155    }
156
157    /// Adds a detail to the flag.
158    pub fn with_detail(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
159        self.details.insert(key.into(), value.into());
160        self
161    }
162
163    /// Sets the confidence score.
164    pub fn with_confidence(mut self, confidence: f64) -> Self {
165        self.confidence = confidence.clamp(0.0, 1.0);
166        self
167    }
168}
169
170/// Generator for red flags.
171#[derive(Debug, Clone)]
172pub struct RedFlagGenerator {
173    /// Strong red flag patterns.
174    pub strong_flags: Vec<RedFlagPattern>,
175    /// Moderate red flag patterns.
176    pub moderate_flags: Vec<RedFlagPattern>,
177    /// Weak red flag patterns.
178    pub weak_flags: Vec<RedFlagPattern>,
179}
180
181impl Default for RedFlagGenerator {
182    fn default() -> Self {
183        Self::new()
184    }
185}
186
187impl RedFlagGenerator {
188    /// Creates a new red flag generator with default patterns.
189    pub fn new() -> Self {
190        Self {
191            strong_flags: Self::default_strong_flags(),
192            moderate_flags: Self::default_moderate_flags(),
193            weak_flags: Self::default_weak_flags(),
194        }
195    }
196
197    /// Returns all patterns.
198    pub fn all_patterns(&self) -> Vec<&RedFlagPattern> {
199        let mut patterns: Vec<&RedFlagPattern> = Vec::new();
200        patterns.extend(self.strong_flags.iter());
201        patterns.extend(self.moderate_flags.iter());
202        patterns.extend(self.weak_flags.iter());
203        patterns
204    }
205
206    /// Generates red flags for a transaction.
207    pub fn inject_flags<R: Rng>(
208        &self,
209        document_id: &str,
210        is_fraud: bool,
211        rng: &mut R,
212    ) -> Vec<RedFlag> {
213        let mut flags = Vec::new();
214
215        // Process strong flags
216        for pattern in &self.strong_flags {
217            let prob = if is_fraud {
218                pattern.inject_with_fraud
219            } else {
220                pattern.inject_without_fraud
221            };
222            if rng.gen::<f64>() < prob {
223                flags.push(self.create_flag(document_id, pattern, is_fraud));
224            }
225        }
226
227        // Process moderate flags
228        for pattern in &self.moderate_flags {
229            let prob = if is_fraud {
230                pattern.inject_with_fraud
231            } else {
232                pattern.inject_without_fraud
233            };
234            if rng.gen::<f64>() < prob {
235                flags.push(self.create_flag(document_id, pattern, is_fraud));
236            }
237        }
238
239        // Process weak flags
240        for pattern in &self.weak_flags {
241            let prob = if is_fraud {
242                pattern.inject_with_fraud
243            } else {
244                pattern.inject_without_fraud
245            };
246            if rng.gen::<f64>() < prob {
247                flags.push(self.create_flag(document_id, pattern, is_fraud));
248            }
249        }
250
251        flags
252    }
253
254    /// Creates a red flag instance from a pattern.
255    fn create_flag(&self, document_id: &str, pattern: &RedFlagPattern, is_fraud: bool) -> RedFlag {
256        RedFlag::new(
257            &pattern.name,
258            document_id,
259            pattern.category,
260            pattern.strength,
261            is_fraud,
262        )
263        .with_confidence(pattern.fraud_probability)
264    }
265
266    /// Adds a custom pattern.
267    pub fn add_pattern(&mut self, pattern: RedFlagPattern) {
268        match pattern.strength {
269            RedFlagStrength::Strong => self.strong_flags.push(pattern),
270            RedFlagStrength::Moderate => self.moderate_flags.push(pattern),
271            RedFlagStrength::Weak => self.weak_flags.push(pattern),
272        }
273    }
274
275    /// Default strong red flag patterns.
276    fn default_strong_flags() -> Vec<RedFlagPattern> {
277        vec![
278            RedFlagPattern::new(
279                "matched_address_vendor_employee",
280                "Vendor address matches an employee's home address",
281                RedFlagCategory::Vendor,
282                RedFlagStrength::Strong,
283                0.85,
284                0.90,
285                0.001,
286            )
287            .with_related_schemes(vec!["shell_company", "fictitious_vendor"]),
288            RedFlagPattern::new(
289                "sequential_check_numbers_same_vendor",
290                "Sequential check numbers paid to the same vendor",
291                RedFlagCategory::Transaction,
292                RedFlagStrength::Strong,
293                0.70,
294                0.80,
295                0.01,
296            )
297            .with_related_schemes(vec!["duplicate_payment", "check_tampering"]),
298            RedFlagPattern::new(
299                "po_box_only_vendor",
300                "Vendor has only PO Box address, no physical address",
301                RedFlagCategory::Vendor,
302                RedFlagStrength::Strong,
303                0.60,
304                0.75,
305                0.02,
306            )
307            .with_related_schemes(vec!["fictitious_vendor", "shell_company"]),
308            RedFlagPattern::new(
309                "vendor_bank_matches_employee",
310                "Vendor bank account matches employee's account",
311                RedFlagCategory::Vendor,
312                RedFlagStrength::Strong,
313                0.90,
314                0.95,
315                0.0005,
316            )
317            .with_related_schemes(vec!["fictitious_vendor", "personal_purchases"]),
318            RedFlagPattern::new(
319                "approver_processor_same_person",
320                "Same person created and approved the transaction",
321                RedFlagCategory::Employee,
322                RedFlagStrength::Strong,
323                0.65,
324                0.85,
325                0.015,
326            )
327            .with_related_schemes(vec!["self_approval", "segregation_violation"]),
328        ]
329    }
330
331    /// Default moderate red flag patterns.
332    fn default_moderate_flags() -> Vec<RedFlagPattern> {
333        vec![
334            RedFlagPattern::new(
335                "vendor_no_physical_address",
336                "Vendor has no verified physical address on file",
337                RedFlagCategory::Vendor,
338                RedFlagStrength::Moderate,
339                0.40,
340                0.60,
341                0.05,
342            ),
343            RedFlagPattern::new(
344                "amount_just_below_threshold",
345                "Amount is just below approval threshold",
346                RedFlagCategory::Transaction,
347                RedFlagStrength::Moderate,
348                0.35,
349                0.70,
350                0.10,
351            )
352            .with_related_schemes(vec!["threshold_avoidance", "split_transaction"]),
353            RedFlagPattern::new(
354                "unusual_vendor_payment_pattern",
355                "Payment pattern to vendor differs from historical norm",
356                RedFlagCategory::Vendor,
357                RedFlagStrength::Moderate,
358                0.30,
359                0.55,
360                0.08,
361            ),
362            RedFlagPattern::new(
363                "new_vendor_large_first_payment",
364                "New vendor receives unusually large first payment",
365                RedFlagCategory::Vendor,
366                RedFlagStrength::Moderate,
367                0.40,
368                0.65,
369                0.06,
370            )
371            .with_related_schemes(vec!["shell_company", "kickback"]),
372            RedFlagPattern::new(
373                "missing_supporting_documentation",
374                "Transaction lacks required supporting documentation",
375                RedFlagCategory::Document,
376                RedFlagStrength::Moderate,
377                0.35,
378                0.60,
379                0.08,
380            ),
381            RedFlagPattern::new(
382                "employee_vacation_fraud_pattern",
383                "Suspicious transactions only when specific employee present",
384                RedFlagCategory::Employee,
385                RedFlagStrength::Moderate,
386                0.45,
387                0.70,
388                0.05,
389            ),
390            RedFlagPattern::new(
391                "dormant_vendor_reactivation",
392                "Previously dormant vendor suddenly receives payments",
393                RedFlagCategory::Vendor,
394                RedFlagStrength::Moderate,
395                0.35,
396                0.50,
397                0.07,
398            ),
399            RedFlagPattern::new(
400                "invoice_without_purchase_order",
401                "Invoice paid without corresponding purchase order",
402                RedFlagCategory::Document,
403                RedFlagStrength::Moderate,
404                0.30,
405                0.55,
406                0.12,
407            ),
408        ]
409    }
410
411    /// Default weak red flag patterns.
412    fn default_weak_flags() -> Vec<RedFlagPattern> {
413        vec![
414            RedFlagPattern::new(
415                "round_dollar_amount",
416                "Transaction amount is a round number",
417                RedFlagCategory::Transaction,
418                RedFlagStrength::Weak,
419                0.15,
420                0.40,
421                0.20,
422            ),
423            RedFlagPattern::new(
424                "month_end_timing",
425                "Transaction posted at month/quarter/year end",
426                RedFlagCategory::Timing,
427                RedFlagStrength::Weak,
428                0.10,
429                0.50,
430                0.30,
431            ),
432            RedFlagPattern::new(
433                "benford_first_digit_deviation",
434                "First digit distribution deviates from Benford's Law",
435                RedFlagCategory::Transaction,
436                RedFlagStrength::Weak,
437                0.12,
438                0.35,
439                0.15,
440            ),
441            RedFlagPattern::new(
442                "after_hours_posting",
443                "Transaction posted outside normal business hours",
444                RedFlagCategory::Timing,
445                RedFlagStrength::Weak,
446                0.15,
447                0.45,
448                0.18,
449            ),
450            RedFlagPattern::new(
451                "unusual_account_combination",
452                "Debit/credit account combination is unusual",
453                RedFlagCategory::Account,
454                RedFlagStrength::Weak,
455                0.20,
456                0.40,
457                0.12,
458            ),
459            RedFlagPattern::new(
460                "repeat_amount_pattern",
461                "Same exact amount appears multiple times",
462                RedFlagCategory::Transaction,
463                RedFlagStrength::Weak,
464                0.18,
465                0.45,
466                0.15,
467            ),
468            RedFlagPattern::new(
469                "weekend_transaction",
470                "Transaction recorded on weekend",
471                RedFlagCategory::Timing,
472                RedFlagStrength::Weak,
473                0.12,
474                0.35,
475                0.15,
476            ),
477            RedFlagPattern::new(
478                "vague_description",
479                "Transaction description is vague or missing",
480                RedFlagCategory::Document,
481                RedFlagStrength::Weak,
482                0.15,
483                0.40,
484                0.18,
485            ),
486        ]
487    }
488}
489
490/// Statistics about generated red flags.
491#[derive(Debug, Clone, Default, Serialize, Deserialize)]
492pub struct RedFlagStatistics {
493    /// Total flags generated.
494    pub total_flags: usize,
495    /// Flags on fraudulent transactions.
496    pub flags_with_fraud: usize,
497    /// Flags on legitimate transactions (false positives).
498    pub flags_without_fraud: usize,
499    /// Breakdown by strength.
500    pub by_strength: HashMap<String, usize>,
501    /// Breakdown by category.
502    pub by_category: HashMap<String, usize>,
503    /// Breakdown by pattern name.
504    pub by_pattern: HashMap<String, usize>,
505}
506
507impl RedFlagStatistics {
508    /// Creates statistics from a list of flags.
509    #[allow(clippy::field_reassign_with_default)]
510    pub fn from_flags(flags: &[RedFlag]) -> Self {
511        let mut stats = Self::default();
512        stats.total_flags = flags.len();
513
514        for flag in flags {
515            if flag.is_fraudulent {
516                stats.flags_with_fraud += 1;
517            } else {
518                stats.flags_without_fraud += 1;
519            }
520
521            *stats
522                .by_strength
523                .entry(format!("{:?}", flag.strength))
524                .or_insert(0) += 1;
525
526            *stats
527                .by_category
528                .entry(format!("{:?}", flag.category))
529                .or_insert(0) += 1;
530
531            *stats
532                .by_pattern
533                .entry(flag.pattern_name.clone())
534                .or_insert(0) += 1;
535        }
536
537        stats
538    }
539
540    /// Returns the precision (true positive rate among flagged transactions).
541    pub fn precision(&self) -> f64 {
542        if self.total_flags > 0 {
543            self.flags_with_fraud as f64 / self.total_flags as f64
544        } else {
545            0.0
546        }
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553    use rand::SeedableRng;
554    use rand_chacha::ChaCha8Rng;
555
556    #[test]
557    fn test_red_flag_pattern() {
558        let pattern = RedFlagPattern::new(
559            "test_pattern",
560            "Test pattern description",
561            RedFlagCategory::Vendor,
562            RedFlagStrength::Strong,
563            0.80,
564            0.90,
565            0.05,
566        )
567        .with_related_schemes(vec!["shell_company"]);
568
569        assert_eq!(pattern.name, "test_pattern");
570        assert_eq!(pattern.strength, RedFlagStrength::Strong);
571        assert!((pattern.lift() - 18.0).abs() < 0.01); // 0.90 / 0.05 = 18
572    }
573
574    #[test]
575    fn test_red_flag() {
576        let flag = RedFlag::new(
577            "matched_address",
578            "INV001",
579            RedFlagCategory::Vendor,
580            RedFlagStrength::Strong,
581            true,
582        )
583        .with_detail("vendor_id", "V001")
584        .with_confidence(0.85);
585
586        assert_eq!(flag.document_id, "INV001");
587        assert!(flag.is_fraudulent);
588        assert_eq!(flag.confidence, 0.85);
589        assert_eq!(flag.details.get("vendor_id"), Some(&"V001".to_string()));
590    }
591
592    #[test]
593    fn test_red_flag_generator() {
594        let generator = RedFlagGenerator::new();
595
596        assert!(!generator.strong_flags.is_empty());
597        assert!(!generator.moderate_flags.is_empty());
598        assert!(!generator.weak_flags.is_empty());
599
600        let all_patterns = generator.all_patterns();
601        assert!(all_patterns.len() > 15);
602    }
603
604    #[test]
605    fn test_inject_flags_fraud() {
606        let generator = RedFlagGenerator::new();
607        let mut rng = ChaCha8Rng::seed_from_u64(42);
608
609        // Test with fraud - should have higher flag rate
610        let fraud_flags: Vec<RedFlag> = (0..100)
611            .flat_map(|i| generator.inject_flags(&format!("DOC{:03}", i), true, &mut rng))
612            .collect();
613
614        // Test without fraud - should have lower flag rate
615        let mut rng2 = ChaCha8Rng::seed_from_u64(42);
616        let legit_flags: Vec<RedFlag> = (0..100)
617            .flat_map(|i| generator.inject_flags(&format!("DOC{:03}", i), false, &mut rng2))
618            .collect();
619
620        // Fraud should generate more flags on average
621        assert!(fraud_flags.len() > legit_flags.len());
622    }
623
624    #[test]
625    fn test_red_flag_statistics() {
626        let flags = vec![
627            RedFlag::new(
628                "pattern1",
629                "DOC1",
630                RedFlagCategory::Vendor,
631                RedFlagStrength::Strong,
632                true,
633            ),
634            RedFlag::new(
635                "pattern2",
636                "DOC2",
637                RedFlagCategory::Transaction,
638                RedFlagStrength::Moderate,
639                true,
640            ),
641            RedFlag::new(
642                "pattern3",
643                "DOC3",
644                RedFlagCategory::Timing,
645                RedFlagStrength::Weak,
646                false,
647            ),
648        ];
649
650        let stats = RedFlagStatistics::from_flags(&flags);
651
652        assert_eq!(stats.total_flags, 3);
653        assert_eq!(stats.flags_with_fraud, 2);
654        assert_eq!(stats.flags_without_fraud, 1);
655        assert!((stats.precision() - 0.666).abs() < 0.01);
656    }
657
658    #[test]
659    fn test_default_patterns_have_correct_properties() {
660        let generator = RedFlagGenerator::new();
661
662        // Strong flags should have high fraud probability
663        for pattern in &generator.strong_flags {
664            assert!(pattern.fraud_probability >= 0.60);
665            assert!(pattern.inject_with_fraud > pattern.inject_without_fraud);
666        }
667
668        // Weak flags should have low fraud probability
669        for pattern in &generator.weak_flags {
670            assert!(pattern.fraud_probability < 0.30);
671        }
672    }
673
674    #[test]
675    fn test_add_custom_pattern() {
676        let mut generator = RedFlagGenerator::new();
677        let initial_strong = generator.strong_flags.len();
678
679        generator.add_pattern(RedFlagPattern::new(
680            "custom_pattern",
681            "Custom test pattern",
682            RedFlagCategory::Account,
683            RedFlagStrength::Strong,
684            0.75,
685            0.85,
686            0.03,
687        ));
688
689        assert_eq!(generator.strong_flags.len(), initial_strong + 1);
690    }
691}