Skip to main content

datasynth_generators/fraud/
red_flags.rs

1//! Red flag generation with correlation probabilities.
2//!
3//! This module generates fraud indicators (red flags) with appropriate
4//! correlation probabilities for both fraudulent and legitimate transactions.
5
6use rand::Rng;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Strength of a red flag indicator.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12pub enum RedFlagStrength {
13    /// Strong correlation with fraud (>60% fraud probability).
14    Strong,
15    /// Moderate correlation (30-60% fraud probability).
16    Moderate,
17    /// Weak correlation (<30% fraud probability).
18    Weak,
19}
20
21impl RedFlagStrength {
22    /// Returns the fraud probability range for this strength.
23    pub fn fraud_probability_range(&self) -> (f64, f64) {
24        match self {
25            RedFlagStrength::Strong => (0.60, 0.90),
26            RedFlagStrength::Moderate => (0.30, 0.60),
27            RedFlagStrength::Weak => (0.10, 0.30),
28        }
29    }
30}
31
32/// Category of red flag.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
34pub enum RedFlagCategory {
35    /// Vendor-related flags.
36    Vendor,
37    /// Transaction pattern flags.
38    Transaction,
39    /// Employee behavior flags.
40    Employee,
41    /// Document-related flags.
42    Document,
43    /// Timing-related flags.
44    Timing,
45    /// Account-related flags.
46    Account,
47}
48
49/// A red flag pattern definition.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct RedFlagPattern {
52    /// Unique name of the pattern.
53    pub name: String,
54    /// Human-readable description.
55    pub description: String,
56    /// Category of the flag.
57    pub category: RedFlagCategory,
58    /// Strength of the flag.
59    pub strength: RedFlagStrength,
60    /// Base probability that this flag indicates fraud.
61    pub fraud_probability: f64,
62    /// Probability of flag appearing when fraud is present: P(flag | fraud).
63    pub inject_with_fraud: f64,
64    /// Probability of flag appearing in legitimate transactions: P(flag | not fraud).
65    pub inject_without_fraud: f64,
66    /// Detection methods effective for this flag.
67    pub detection_methods: Vec<String>,
68    /// Related fraud schemes.
69    pub related_schemes: Vec<String>,
70}
71
72impl RedFlagPattern {
73    /// Creates a new red flag pattern.
74    pub fn new(
75        name: impl Into<String>,
76        description: impl Into<String>,
77        category: RedFlagCategory,
78        strength: RedFlagStrength,
79        fraud_probability: f64,
80        inject_with_fraud: f64,
81        inject_without_fraud: f64,
82    ) -> Self {
83        Self {
84            name: name.into(),
85            description: description.into(),
86            category,
87            strength,
88            fraud_probability,
89            inject_with_fraud,
90            inject_without_fraud,
91            detection_methods: Vec::new(),
92            related_schemes: Vec::new(),
93        }
94    }
95
96    /// Adds detection methods.
97    pub fn with_detection_methods(mut self, methods: Vec<impl Into<String>>) -> Self {
98        self.detection_methods = methods.into_iter().map(Into::into).collect();
99        self
100    }
101
102    /// Adds related fraud schemes.
103    pub fn with_related_schemes(mut self, schemes: Vec<impl Into<String>>) -> Self {
104        self.related_schemes = schemes.into_iter().map(Into::into).collect();
105        self
106    }
107
108    /// Calculates the lift (how much more likely fraud is when flag is present).
109    pub fn lift(&self) -> f64 {
110        if self.inject_without_fraud > 0.0 {
111            self.inject_with_fraud / self.inject_without_fraud
112        } else {
113            f64::INFINITY
114        }
115    }
116}
117
118/// An instantiated red flag on a specific transaction.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RedFlag {
121    /// Reference to the pattern name.
122    pub pattern_name: String,
123    /// Document ID where flag was detected.
124    pub document_id: String,
125    /// Category of the flag.
126    pub category: RedFlagCategory,
127    /// Strength of the flag.
128    pub strength: RedFlagStrength,
129    /// Specific details about the flag instance.
130    pub details: HashMap<String, String>,
131    /// Whether this flag is actually associated with fraud.
132    pub is_fraudulent: bool,
133    /// Confidence score (0.0-1.0).
134    pub confidence: f64,
135}
136
137impl RedFlag {
138    /// Creates a new red flag instance.
139    pub fn new(
140        pattern_name: impl Into<String>,
141        document_id: impl Into<String>,
142        category: RedFlagCategory,
143        strength: RedFlagStrength,
144        is_fraudulent: bool,
145    ) -> Self {
146        Self {
147            pattern_name: pattern_name.into(),
148            document_id: document_id.into(),
149            category,
150            strength,
151            details: HashMap::new(),
152            is_fraudulent,
153            confidence: 1.0,
154        }
155    }
156
157    /// Adds a detail to the flag.
158    pub fn with_detail(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
159        self.details.insert(key.into(), value.into());
160        self
161    }
162
163    /// Sets the confidence score.
164    pub fn with_confidence(mut self, confidence: f64) -> Self {
165        self.confidence = confidence.clamp(0.0, 1.0);
166        self
167    }
168}
169
170/// Generator for red flags.
171#[derive(Debug, Clone)]
172pub struct RedFlagGenerator {
173    /// Strong red flag patterns.
174    pub strong_flags: Vec<RedFlagPattern>,
175    /// Moderate red flag patterns.
176    pub moderate_flags: Vec<RedFlagPattern>,
177    /// Weak red flag patterns.
178    pub weak_flags: Vec<RedFlagPattern>,
179}
180
181impl Default for RedFlagGenerator {
182    fn default() -> Self {
183        Self::new()
184    }
185}
186
187impl RedFlagGenerator {
188    /// Creates a new red flag generator with default patterns.
189    pub fn new() -> Self {
190        Self {
191            strong_flags: Self::default_strong_flags(),
192            moderate_flags: Self::default_moderate_flags(),
193            weak_flags: Self::default_weak_flags(),
194        }
195    }
196
197    /// Returns all patterns.
198    pub fn all_patterns(&self) -> Vec<&RedFlagPattern> {
199        let mut patterns: Vec<&RedFlagPattern> = Vec::new();
200        patterns.extend(self.strong_flags.iter());
201        patterns.extend(self.moderate_flags.iter());
202        patterns.extend(self.weak_flags.iter());
203        patterns
204    }
205
206    /// Generates red flags for a transaction.
207    pub fn inject_flags<R: Rng>(
208        &self,
209        document_id: &str,
210        is_fraud: bool,
211        rng: &mut R,
212    ) -> Vec<RedFlag> {
213        let mut flags = Vec::new();
214
215        // Process strong flags
216        for pattern in &self.strong_flags {
217            let prob = if is_fraud {
218                pattern.inject_with_fraud
219            } else {
220                pattern.inject_without_fraud
221            };
222            if rng.gen::<f64>() < prob {
223                flags.push(self.create_flag(document_id, pattern, is_fraud));
224            }
225        }
226
227        // Process moderate flags
228        for pattern in &self.moderate_flags {
229            let prob = if is_fraud {
230                pattern.inject_with_fraud
231            } else {
232                pattern.inject_without_fraud
233            };
234            if rng.gen::<f64>() < prob {
235                flags.push(self.create_flag(document_id, pattern, is_fraud));
236            }
237        }
238
239        // Process weak flags
240        for pattern in &self.weak_flags {
241            let prob = if is_fraud {
242                pattern.inject_with_fraud
243            } else {
244                pattern.inject_without_fraud
245            };
246            if rng.gen::<f64>() < prob {
247                flags.push(self.create_flag(document_id, pattern, is_fraud));
248            }
249        }
250
251        flags
252    }
253
254    /// Creates a red flag instance from a pattern.
255    fn create_flag(&self, document_id: &str, pattern: &RedFlagPattern, is_fraud: bool) -> RedFlag {
256        RedFlag::new(
257            &pattern.name,
258            document_id,
259            pattern.category,
260            pattern.strength,
261            is_fraud,
262        )
263        .with_confidence(pattern.fraud_probability)
264    }
265
266    /// Adds a custom pattern.
267    pub fn add_pattern(&mut self, pattern: RedFlagPattern) {
268        match pattern.strength {
269            RedFlagStrength::Strong => self.strong_flags.push(pattern),
270            RedFlagStrength::Moderate => self.moderate_flags.push(pattern),
271            RedFlagStrength::Weak => self.weak_flags.push(pattern),
272        }
273    }
274
275    /// Default strong red flag patterns.
276    fn default_strong_flags() -> Vec<RedFlagPattern> {
277        vec![
278            RedFlagPattern::new(
279                "matched_address_vendor_employee",
280                "Vendor address matches an employee's home address",
281                RedFlagCategory::Vendor,
282                RedFlagStrength::Strong,
283                0.85,
284                0.90,
285                0.001,
286            )
287            .with_related_schemes(vec!["shell_company", "fictitious_vendor"]),
288            RedFlagPattern::new(
289                "sequential_check_numbers_same_vendor",
290                "Sequential check numbers paid to the same vendor",
291                RedFlagCategory::Transaction,
292                RedFlagStrength::Strong,
293                0.70,
294                0.80,
295                0.01,
296            )
297            .with_related_schemes(vec!["duplicate_payment", "check_tampering"]),
298            RedFlagPattern::new(
299                "po_box_only_vendor",
300                "Vendor has only PO Box address, no physical address",
301                RedFlagCategory::Vendor,
302                RedFlagStrength::Strong,
303                0.60,
304                0.75,
305                0.02,
306            )
307            .with_related_schemes(vec!["fictitious_vendor", "shell_company"]),
308            RedFlagPattern::new(
309                "vendor_bank_matches_employee",
310                "Vendor bank account matches employee's account",
311                RedFlagCategory::Vendor,
312                RedFlagStrength::Strong,
313                0.90,
314                0.95,
315                0.0005,
316            )
317            .with_related_schemes(vec!["fictitious_vendor", "personal_purchases"]),
318            RedFlagPattern::new(
319                "approver_processor_same_person",
320                "Same person created and approved the transaction",
321                RedFlagCategory::Employee,
322                RedFlagStrength::Strong,
323                0.65,
324                0.85,
325                0.015,
326            )
327            .with_related_schemes(vec!["self_approval", "segregation_violation"]),
328        ]
329    }
330
331    /// Default moderate red flag patterns.
332    fn default_moderate_flags() -> Vec<RedFlagPattern> {
333        vec![
334            RedFlagPattern::new(
335                "vendor_no_physical_address",
336                "Vendor has no verified physical address on file",
337                RedFlagCategory::Vendor,
338                RedFlagStrength::Moderate,
339                0.40,
340                0.60,
341                0.05,
342            ),
343            RedFlagPattern::new(
344                "amount_just_below_threshold",
345                "Amount is just below approval threshold",
346                RedFlagCategory::Transaction,
347                RedFlagStrength::Moderate,
348                0.35,
349                0.70,
350                0.10,
351            )
352            .with_related_schemes(vec!["threshold_avoidance", "split_transaction"]),
353            RedFlagPattern::new(
354                "unusual_vendor_payment_pattern",
355                "Payment pattern to vendor differs from historical norm",
356                RedFlagCategory::Vendor,
357                RedFlagStrength::Moderate,
358                0.30,
359                0.55,
360                0.08,
361            ),
362            RedFlagPattern::new(
363                "new_vendor_large_first_payment",
364                "New vendor receives unusually large first payment",
365                RedFlagCategory::Vendor,
366                RedFlagStrength::Moderate,
367                0.40,
368                0.65,
369                0.06,
370            )
371            .with_related_schemes(vec!["shell_company", "kickback"]),
372            RedFlagPattern::new(
373                "missing_supporting_documentation",
374                "Transaction lacks required supporting documentation",
375                RedFlagCategory::Document,
376                RedFlagStrength::Moderate,
377                0.35,
378                0.60,
379                0.08,
380            ),
381            RedFlagPattern::new(
382                "employee_vacation_fraud_pattern",
383                "Suspicious transactions only when specific employee present",
384                RedFlagCategory::Employee,
385                RedFlagStrength::Moderate,
386                0.45,
387                0.70,
388                0.05,
389            ),
390            RedFlagPattern::new(
391                "dormant_vendor_reactivation",
392                "Previously dormant vendor suddenly receives payments",
393                RedFlagCategory::Vendor,
394                RedFlagStrength::Moderate,
395                0.35,
396                0.50,
397                0.07,
398            ),
399            RedFlagPattern::new(
400                "invoice_without_purchase_order",
401                "Invoice paid without corresponding purchase order",
402                RedFlagCategory::Document,
403                RedFlagStrength::Moderate,
404                0.30,
405                0.55,
406                0.12,
407            ),
408        ]
409    }
410
411    /// Default weak red flag patterns.
412    fn default_weak_flags() -> Vec<RedFlagPattern> {
413        vec![
414            RedFlagPattern::new(
415                "round_dollar_amount",
416                "Transaction amount is a round number",
417                RedFlagCategory::Transaction,
418                RedFlagStrength::Weak,
419                0.15,
420                0.40,
421                0.20,
422            ),
423            RedFlagPattern::new(
424                "month_end_timing",
425                "Transaction posted at month/quarter/year end",
426                RedFlagCategory::Timing,
427                RedFlagStrength::Weak,
428                0.10,
429                0.50,
430                0.30,
431            ),
432            RedFlagPattern::new(
433                "benford_first_digit_deviation",
434                "First digit distribution deviates from Benford's Law",
435                RedFlagCategory::Transaction,
436                RedFlagStrength::Weak,
437                0.12,
438                0.35,
439                0.15,
440            ),
441            RedFlagPattern::new(
442                "after_hours_posting",
443                "Transaction posted outside normal business hours",
444                RedFlagCategory::Timing,
445                RedFlagStrength::Weak,
446                0.15,
447                0.45,
448                0.18,
449            ),
450            RedFlagPattern::new(
451                "unusual_account_combination",
452                "Debit/credit account combination is unusual",
453                RedFlagCategory::Account,
454                RedFlagStrength::Weak,
455                0.20,
456                0.40,
457                0.12,
458            ),
459            RedFlagPattern::new(
460                "repeat_amount_pattern",
461                "Same exact amount appears multiple times",
462                RedFlagCategory::Transaction,
463                RedFlagStrength::Weak,
464                0.18,
465                0.45,
466                0.15,
467            ),
468            RedFlagPattern::new(
469                "weekend_transaction",
470                "Transaction recorded on weekend",
471                RedFlagCategory::Timing,
472                RedFlagStrength::Weak,
473                0.12,
474                0.35,
475                0.15,
476            ),
477            RedFlagPattern::new(
478                "vague_description",
479                "Transaction description is vague or missing",
480                RedFlagCategory::Document,
481                RedFlagStrength::Weak,
482                0.15,
483                0.40,
484                0.18,
485            ),
486        ]
487    }
488}
489
490/// Statistics about generated red flags.
491#[derive(Debug, Clone, Default, Serialize, Deserialize)]
492pub struct RedFlagStatistics {
493    /// Total flags generated.
494    pub total_flags: usize,
495    /// Flags on fraudulent transactions.
496    pub flags_with_fraud: usize,
497    /// Flags on legitimate transactions (false positives).
498    pub flags_without_fraud: usize,
499    /// Breakdown by strength.
500    pub by_strength: HashMap<String, usize>,
501    /// Breakdown by category.
502    pub by_category: HashMap<String, usize>,
503    /// Breakdown by pattern name.
504    pub by_pattern: HashMap<String, usize>,
505}
506
507impl RedFlagStatistics {
508    /// Creates statistics from a list of flags.
509    pub fn from_flags(flags: &[RedFlag]) -> Self {
510        let mut stats = Self {
511            total_flags: flags.len(),
512            ..Self::default()
513        };
514
515        for flag in flags {
516            if flag.is_fraudulent {
517                stats.flags_with_fraud += 1;
518            } else {
519                stats.flags_without_fraud += 1;
520            }
521
522            *stats
523                .by_strength
524                .entry(format!("{:?}", flag.strength))
525                .or_insert(0) += 1;
526
527            *stats
528                .by_category
529                .entry(format!("{:?}", flag.category))
530                .or_insert(0) += 1;
531
532            *stats
533                .by_pattern
534                .entry(flag.pattern_name.clone())
535                .or_insert(0) += 1;
536        }
537
538        stats
539    }
540
541    /// Returns the precision (true positive rate among flagged transactions).
542    pub fn precision(&self) -> f64 {
543        if self.total_flags > 0 {
544            self.flags_with_fraud as f64 / self.total_flags as f64
545        } else {
546            0.0
547        }
548    }
549}
550
551#[cfg(test)]
552#[allow(clippy::unwrap_used)]
553mod tests {
554    use super::*;
555    use rand::SeedableRng;
556    use rand_chacha::ChaCha8Rng;
557
558    #[test]
559    fn test_red_flag_pattern() {
560        let pattern = RedFlagPattern::new(
561            "test_pattern",
562            "Test pattern description",
563            RedFlagCategory::Vendor,
564            RedFlagStrength::Strong,
565            0.80,
566            0.90,
567            0.05,
568        )
569        .with_related_schemes(vec!["shell_company"]);
570
571        assert_eq!(pattern.name, "test_pattern");
572        assert_eq!(pattern.strength, RedFlagStrength::Strong);
573        assert!((pattern.lift() - 18.0).abs() < 0.01); // 0.90 / 0.05 = 18
574    }
575
576    #[test]
577    fn test_red_flag() {
578        let flag = RedFlag::new(
579            "matched_address",
580            "INV001",
581            RedFlagCategory::Vendor,
582            RedFlagStrength::Strong,
583            true,
584        )
585        .with_detail("vendor_id", "V001")
586        .with_confidence(0.85);
587
588        assert_eq!(flag.document_id, "INV001");
589        assert!(flag.is_fraudulent);
590        assert_eq!(flag.confidence, 0.85);
591        assert_eq!(flag.details.get("vendor_id"), Some(&"V001".to_string()));
592    }
593
594    #[test]
595    fn test_red_flag_generator() {
596        let generator = RedFlagGenerator::new();
597
598        assert!(!generator.strong_flags.is_empty());
599        assert!(!generator.moderate_flags.is_empty());
600        assert!(!generator.weak_flags.is_empty());
601
602        let all_patterns = generator.all_patterns();
603        assert!(all_patterns.len() > 15);
604    }
605
606    #[test]
607    fn test_inject_flags_fraud() {
608        let generator = RedFlagGenerator::new();
609        let mut rng = ChaCha8Rng::seed_from_u64(42);
610
611        // Test with fraud - should have higher flag rate
612        let fraud_flags: Vec<RedFlag> = (0..100)
613            .flat_map(|i| generator.inject_flags(&format!("DOC{:03}", i), true, &mut rng))
614            .collect();
615
616        // Test without fraud - should have lower flag rate
617        let mut rng2 = ChaCha8Rng::seed_from_u64(42);
618        let legit_flags: Vec<RedFlag> = (0..100)
619            .flat_map(|i| generator.inject_flags(&format!("DOC{:03}", i), false, &mut rng2))
620            .collect();
621
622        // Fraud should generate more flags on average
623        assert!(fraud_flags.len() > legit_flags.len());
624    }
625
626    #[test]
627    fn test_red_flag_statistics() {
628        let flags = vec![
629            RedFlag::new(
630                "pattern1",
631                "DOC1",
632                RedFlagCategory::Vendor,
633                RedFlagStrength::Strong,
634                true,
635            ),
636            RedFlag::new(
637                "pattern2",
638                "DOC2",
639                RedFlagCategory::Transaction,
640                RedFlagStrength::Moderate,
641                true,
642            ),
643            RedFlag::new(
644                "pattern3",
645                "DOC3",
646                RedFlagCategory::Timing,
647                RedFlagStrength::Weak,
648                false,
649            ),
650        ];
651
652        let stats = RedFlagStatistics::from_flags(&flags);
653
654        assert_eq!(stats.total_flags, 3);
655        assert_eq!(stats.flags_with_fraud, 2);
656        assert_eq!(stats.flags_without_fraud, 1);
657        assert!((stats.precision() - 0.666).abs() < 0.01);
658    }
659
660    #[test]
661    fn test_default_patterns_have_correct_properties() {
662        let generator = RedFlagGenerator::new();
663
664        // Strong flags should have high fraud probability
665        for pattern in &generator.strong_flags {
666            assert!(pattern.fraud_probability >= 0.60);
667            assert!(pattern.inject_with_fraud > pattern.inject_without_fraud);
668        }
669
670        // Weak flags should have low fraud probability
671        for pattern in &generator.weak_flags {
672            assert!(pattern.fraud_probability < 0.30);
673        }
674    }
675
676    #[test]
677    fn test_add_custom_pattern() {
678        let mut generator = RedFlagGenerator::new();
679        let initial_strong = generator.strong_flags.len();
680
681        generator.add_pattern(RedFlagPattern::new(
682            "custom_pattern",
683            "Custom test pattern",
684            RedFlagCategory::Account,
685            RedFlagStrength::Strong,
686            0.75,
687            0.85,
688            0.03,
689        ));
690
691        assert_eq!(generator.strong_flags.len(), initial_strong + 1);
692    }
693}