Skip to main content

datasynth_generators/anomaly/
document_flow_anomalies.rs

1//! Document flow anomaly injection for 3-way match fraud patterns.
2//!
3//! This module provides anomaly injection specifically for document flows,
4//! simulating common procurement fraud patterns:
5//! - Quantity mismatches between PO, GR, and Invoice
6//! - Maverick buying (Invoice without PO)
7//! - Unbilled goods (GR without Invoice)
8//! - Unauthorized disbursements (Payment without Invoice)
9
10use chrono::NaiveDate;
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13use rust_decimal::Decimal;
14use serde::{Deserialize, Serialize};
15
16use datasynth_core::models::documents::{GoodsReceipt, Payment, PurchaseOrder, VendorInvoice};
17use datasynth_core::{AnomalyType, FraudType, LabeledAnomaly, ProcessIssueType};
18
19/// Types of document flow anomalies.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
21pub enum DocumentFlowAnomalyType {
22    /// GR quantity doesn't match PO quantity
23    QuantityMismatch,
24    /// Price on invoice doesn't match PO price
25    PriceMismatch,
26    /// Invoice received without corresponding PO (maverick buying)
27    InvoiceWithoutPO,
28    /// Goods received but never invoiced
29    GoodsReceivedNotBilled,
30    /// Payment issued without valid invoice
31    PaymentWithoutInvoice,
32    /// Duplicate invoice for same PO
33    DuplicateInvoice,
34    /// Invoice date before goods receipt
35    InvoiceBeforeReceipt,
36    /// Payment before invoice approval
37    EarlyPayment,
38}
39
40/// Result of injecting a document flow anomaly.
41#[derive(Debug, Clone)]
42pub struct DocumentFlowAnomalyResult {
43    /// Type of anomaly injected
44    pub anomaly_type: DocumentFlowAnomalyType,
45    /// Description of what was modified
46    pub description: String,
47    /// Original value (if applicable)
48    pub original_value: Option<String>,
49    /// Modified value (if applicable)
50    pub modified_value: Option<String>,
51    /// Associated document IDs
52    pub document_ids: Vec<String>,
53    /// Severity (1-5)
54    pub severity: u8,
55}
56
57impl DocumentFlowAnomalyResult {
58    /// Convert to a labeled anomaly for ML training.
59    pub fn to_labeled_anomaly(
60        &self,
61        anomaly_id: &str,
62        document_id: &str,
63        company_code: &str,
64        date: NaiveDate,
65    ) -> LabeledAnomaly {
66        // Map document flow anomaly types to existing AnomalyType variants
67        let anomaly_type = match self.anomaly_type {
68            // Quantity/price mismatches are invoice manipulation fraud
69            DocumentFlowAnomalyType::QuantityMismatch => {
70                AnomalyType::Fraud(FraudType::InvoiceManipulation)
71            }
72            DocumentFlowAnomalyType::PriceMismatch => {
73                AnomalyType::Fraud(FraudType::InvoiceManipulation)
74            }
75            // Invoice without PO is a process issue (missing documentation/control bypass)
76            DocumentFlowAnomalyType::InvoiceWithoutPO => {
77                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
78            }
79            // Goods received but not billed could indicate asset misappropriation
80            DocumentFlowAnomalyType::GoodsReceivedNotBilled => {
81                AnomalyType::Fraud(FraudType::AssetMisappropriation)
82            }
83            // Payment without invoice is unauthorized approval
84            DocumentFlowAnomalyType::PaymentWithoutInvoice => {
85                AnomalyType::Fraud(FraudType::UnauthorizedApproval)
86            }
87            // Duplicate invoice is duplicate payment fraud
88            DocumentFlowAnomalyType::DuplicateInvoice => {
89                AnomalyType::Fraud(FraudType::DuplicatePayment)
90            }
91            // Invoice before receipt is process timing issue
92            DocumentFlowAnomalyType::InvoiceBeforeReceipt => {
93                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
94            }
95            // Early payment bypasses normal approval
96            DocumentFlowAnomalyType::EarlyPayment => {
97                AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval)
98            }
99        };
100
101        LabeledAnomaly::new(
102            anomaly_id.to_string(),
103            anomaly_type,
104            document_id.to_string(),
105            "DocumentFlow".to_string(),
106            company_code.to_string(),
107            date,
108        )
109        .with_description(&self.description)
110    }
111}
112
113/// Configuration for document flow anomaly injection.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct DocumentFlowAnomalyConfig {
116    /// Probability of quantity mismatch (0.0-1.0)
117    pub quantity_mismatch_rate: f64,
118    /// Probability of price mismatch
119    pub price_mismatch_rate: f64,
120    /// Probability of invoice without PO
121    pub maverick_buying_rate: f64,
122    /// Probability of GR without invoice
123    pub unbilled_receipt_rate: f64,
124    /// Probability of payment without invoice
125    pub unauthorized_payment_rate: f64,
126    /// Probability of duplicate invoice
127    pub duplicate_invoice_rate: f64,
128    /// Probability of invoice before receipt
129    pub early_invoice_rate: f64,
130    /// Probability of early payment
131    pub early_payment_rate: f64,
132    /// Maximum quantity variance percentage (e.g., 0.2 = 20%)
133    pub max_quantity_variance: f64,
134    /// Maximum price variance percentage
135    pub max_price_variance: f64,
136}
137
138impl Default for DocumentFlowAnomalyConfig {
139    fn default() -> Self {
140        Self {
141            quantity_mismatch_rate: 0.02,     // 2% of receipts
142            price_mismatch_rate: 0.015,       // 1.5% of invoices
143            maverick_buying_rate: 0.01,       // 1% maverick buying
144            unbilled_receipt_rate: 0.005,     // 0.5% unbilled
145            unauthorized_payment_rate: 0.002, // 0.2% unauthorized
146            duplicate_invoice_rate: 0.008,    // 0.8% duplicates
147            early_invoice_rate: 0.01,         // 1% early invoices
148            early_payment_rate: 0.005,        // 0.5% early payments
149            max_quantity_variance: 0.25,      // Up to 25% variance
150            max_price_variance: 0.15,         // Up to 15% variance
151        }
152    }
153}
154
155/// Injector for document flow anomalies.
156pub struct DocumentFlowAnomalyInjector {
157    config: DocumentFlowAnomalyConfig,
158    rng: ChaCha8Rng,
159    results: Vec<DocumentFlowAnomalyResult>,
160}
161
162impl DocumentFlowAnomalyInjector {
163    /// Create a new document flow anomaly injector.
164    pub fn new(config: DocumentFlowAnomalyConfig, seed: u64) -> Self {
165        Self {
166            config,
167            rng: ChaCha8Rng::seed_from_u64(seed),
168            results: Vec::new(),
169        }
170    }
171
172    /// Create with default configuration.
173    pub fn with_seed(seed: u64) -> Self {
174        Self::new(DocumentFlowAnomalyConfig::default(), seed)
175    }
176
177    /// Get the results of anomaly injection.
178    pub fn get_results(&self) -> &[DocumentFlowAnomalyResult] {
179        &self.results
180    }
181
182    /// Clear results.
183    pub fn clear_results(&mut self) {
184        self.results.clear();
185    }
186
187    /// Maybe inject a quantity mismatch into a goods receipt.
188    ///
189    /// Returns true if an anomaly was injected.
190    pub fn maybe_inject_quantity_mismatch(
191        &mut self,
192        gr: &mut GoodsReceipt,
193        po: &PurchaseOrder,
194    ) -> bool {
195        if self.rng.gen::<f64>() >= self.config.quantity_mismatch_rate {
196            return false;
197        }
198
199        // Find a matching item to modify
200        if let Some(gr_item) = gr.items.first_mut() {
201            let original_qty = gr_item.base.quantity;
202
203            // Generate variance (either over or under)
204            let variance = if self.rng.gen::<bool>() {
205                // Over-receipt (more common in fraud)
206                Decimal::from_f64_retain(
207                    1.0 + self.rng.gen::<f64>() * self.config.max_quantity_variance,
208                )
209                .unwrap_or(Decimal::ONE)
210            } else {
211                // Under-receipt
212                Decimal::from_f64_retain(
213                    1.0 - self.rng.gen::<f64>() * self.config.max_quantity_variance,
214                )
215                .unwrap_or(Decimal::ONE)
216            };
217
218            gr_item.base.quantity = (original_qty * variance).round_dp(2);
219
220            let result = DocumentFlowAnomalyResult {
221                anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
222                description: format!(
223                    "GR quantity {} doesn't match PO, expected based on PO line",
224                    gr_item.base.quantity
225                ),
226                original_value: Some(original_qty.to_string()),
227                modified_value: Some(gr_item.base.quantity.to_string()),
228                document_ids: vec![gr.header.document_id.clone(), po.header.document_id.clone()],
229                severity: if variance > Decimal::from_f64_retain(1.1).unwrap() {
230                    4
231                } else {
232                    3
233                },
234            };
235
236            self.results.push(result);
237            true
238        } else {
239            false
240        }
241    }
242
243    /// Maybe inject a price mismatch into a vendor invoice.
244    ///
245    /// Returns true if an anomaly was injected.
246    pub fn maybe_inject_price_mismatch(
247        &mut self,
248        invoice: &mut VendorInvoice,
249        po: &PurchaseOrder,
250    ) -> bool {
251        if self.rng.gen::<f64>() >= self.config.price_mismatch_rate {
252            return false;
253        }
254
255        // Find a matching item to modify
256        if let Some(inv_item) = invoice.items.first_mut() {
257            let original_price = inv_item.base.unit_price;
258
259            // Usually invoices are higher than PO (vendor overcharging)
260            let variance = if self.rng.gen::<f64>() < 0.8 {
261                // 80% chance of overcharge
262                Decimal::from_f64_retain(
263                    1.0 + self.rng.gen::<f64>() * self.config.max_price_variance,
264                )
265                .unwrap_or(Decimal::ONE)
266            } else {
267                // 20% chance of undercharge (rare, could be error)
268                Decimal::from_f64_retain(
269                    1.0 - self.rng.gen::<f64>() * self.config.max_price_variance * 0.5,
270                )
271                .unwrap_or(Decimal::ONE)
272            };
273
274            inv_item.base.unit_price = (original_price * variance).round_dp(2);
275
276            let result = DocumentFlowAnomalyResult {
277                anomaly_type: DocumentFlowAnomalyType::PriceMismatch,
278                description: format!(
279                    "Invoice price {} doesn't match PO agreed price",
280                    inv_item.base.unit_price
281                ),
282                original_value: Some(original_price.to_string()),
283                modified_value: Some(inv_item.base.unit_price.to_string()),
284                document_ids: vec![
285                    invoice.header.document_id.clone(),
286                    po.header.document_id.clone(),
287                ],
288                severity: if variance > Decimal::from_f64_retain(1.1).unwrap() {
289                    4
290                } else {
291                    3
292                },
293            };
294
295            self.results.push(result);
296            true
297        } else {
298            false
299        }
300    }
301
302    /// Create an invoice without PO reference (maverick buying).
303    ///
304    /// Removes the PO reference from an invoice to simulate maverick buying.
305    pub fn inject_maverick_buying(&mut self, invoice: &mut VendorInvoice) -> bool {
306        if self.rng.gen::<f64>() >= self.config.maverick_buying_rate {
307            return false;
308        }
309
310        // Only inject if there's a PO to remove
311        if invoice.purchase_order_id.is_none() {
312            return false;
313        }
314
315        let original_po = invoice.purchase_order_id.take();
316
317        let result = DocumentFlowAnomalyResult {
318            anomaly_type: DocumentFlowAnomalyType::InvoiceWithoutPO,
319            description: "Invoice submitted without purchase order (maverick buying)".to_string(),
320            original_value: original_po,
321            modified_value: None,
322            document_ids: vec![invoice.header.document_id.clone()],
323            severity: 4, // Significant control bypass
324        };
325
326        self.results.push(result);
327        true
328    }
329
330    /// Mark a goods receipt as having invoice timing anomaly.
331    ///
332    /// Returns a result indicating invoice came before goods receipt.
333    pub fn create_early_invoice_anomaly(
334        &mut self,
335        invoice: &VendorInvoice,
336        gr: &GoodsReceipt,
337    ) -> Option<DocumentFlowAnomalyResult> {
338        if self.rng.gen::<f64>() >= self.config.early_invoice_rate {
339            return None;
340        }
341
342        // Check if invoice date is before GR date
343        if invoice.invoice_date < gr.header.document_date {
344            let result = DocumentFlowAnomalyResult {
345                anomaly_type: DocumentFlowAnomalyType::InvoiceBeforeReceipt,
346                description: format!(
347                    "Invoice dated {} before goods receipt dated {}",
348                    invoice.invoice_date, gr.header.document_date
349                ),
350                original_value: Some(gr.header.document_date.to_string()),
351                modified_value: Some(invoice.invoice_date.to_string()),
352                document_ids: vec![
353                    invoice.header.document_id.clone(),
354                    gr.header.document_id.clone(),
355                ],
356                severity: 3,
357            };
358
359            self.results.push(result.clone());
360            return Some(result);
361        }
362
363        None
364    }
365
366    /// Check for potential unauthorized payment (payment without proper invoice).
367    pub fn check_unauthorized_payment(
368        &mut self,
369        payment: &Payment,
370        has_valid_invoice: bool,
371    ) -> Option<DocumentFlowAnomalyResult> {
372        if has_valid_invoice {
373            return None;
374        }
375
376        if self.rng.gen::<f64>() >= self.config.unauthorized_payment_rate {
377            return None;
378        }
379
380        let result = DocumentFlowAnomalyResult {
381            anomaly_type: DocumentFlowAnomalyType::PaymentWithoutInvoice,
382            description: "Payment issued without valid approved invoice".to_string(),
383            original_value: None,
384            modified_value: None,
385            document_ids: vec![payment.header.document_id.clone()],
386            severity: 5, // Critical - potential fraud
387        };
388
389        self.results.push(result.clone());
390        Some(result)
391    }
392
393    /// Get statistics about injected anomalies.
394    pub fn get_statistics(&self) -> DocumentFlowAnomalyStats {
395        let mut stats = DocumentFlowAnomalyStats::default();
396
397        for result in &self.results {
398            match result.anomaly_type {
399                DocumentFlowAnomalyType::QuantityMismatch => stats.quantity_mismatches += 1,
400                DocumentFlowAnomalyType::PriceMismatch => stats.price_mismatches += 1,
401                DocumentFlowAnomalyType::InvoiceWithoutPO => stats.maverick_buying += 1,
402                DocumentFlowAnomalyType::GoodsReceivedNotBilled => stats.unbilled_receipts += 1,
403                DocumentFlowAnomalyType::PaymentWithoutInvoice => stats.unauthorized_payments += 1,
404                DocumentFlowAnomalyType::DuplicateInvoice => stats.duplicate_invoices += 1,
405                DocumentFlowAnomalyType::InvoiceBeforeReceipt => stats.early_invoices += 1,
406                DocumentFlowAnomalyType::EarlyPayment => stats.early_payments += 1,
407            }
408        }
409
410        stats.total = self.results.len();
411        stats
412    }
413}
414
415/// Statistics about document flow anomalies.
416#[derive(Debug, Clone, Default)]
417pub struct DocumentFlowAnomalyStats {
418    pub total: usize,
419    pub quantity_mismatches: usize,
420    pub price_mismatches: usize,
421    pub maverick_buying: usize,
422    pub unbilled_receipts: usize,
423    pub unauthorized_payments: usize,
424    pub duplicate_invoices: usize,
425    pub early_invoices: usize,
426    pub early_payments: usize,
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432    use datasynth_core::models::documents::{
433        GoodsReceiptItem, PurchaseOrderItem, VendorInvoiceItem,
434    };
435    use rust_decimal_macros::dec;
436
437    fn create_test_po() -> PurchaseOrder {
438        let mut po = PurchaseOrder::new(
439            "PO-001",
440            "1000",
441            "VEND001",
442            2024,
443            1,
444            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
445            "USER001",
446        );
447        po.add_item(PurchaseOrderItem::new(
448            1,
449            "Test Item",
450            dec!(100),
451            dec!(10.00),
452        ));
453        po
454    }
455
456    fn create_test_gr(_po_id: &str) -> GoodsReceipt {
457        let mut gr = GoodsReceipt::new(
458            "GR-001",
459            "1000",
460            "PLANT01",
461            "STOR01",
462            2024,
463            1,
464            NaiveDate::from_ymd_opt(2024, 1, 20).unwrap(),
465            "USER001",
466        );
467        gr.add_item(GoodsReceiptItem::new(
468            1,
469            "Test Item",
470            dec!(100),
471            dec!(10.00),
472        ));
473        gr
474    }
475
476    fn create_test_invoice(po_id: Option<&str>) -> VendorInvoice {
477        let mut inv = VendorInvoice::new(
478            "VI-001",
479            "1000",
480            "VEND001",
481            "INV-001",
482            2024,
483            1,
484            NaiveDate::from_ymd_opt(2024, 1, 25).unwrap(),
485            "USER001",
486        );
487        inv.purchase_order_id = po_id.map(|s| s.to_string());
488        inv.add_item(VendorInvoiceItem::new(
489            1,
490            "Test Item",
491            dec!(100),
492            dec!(10.00),
493        ));
494        inv
495    }
496
497    #[test]
498    fn test_quantity_mismatch_injection() {
499        // Use high rate to ensure injection
500        let config = DocumentFlowAnomalyConfig {
501            quantity_mismatch_rate: 1.0, // Always inject
502            ..Default::default()
503        };
504
505        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
506        let po = create_test_po();
507        let mut gr = create_test_gr(&po.header.document_id);
508
509        let original_qty = gr.items[0].base.quantity;
510        let injected = injector.maybe_inject_quantity_mismatch(&mut gr, &po);
511
512        assert!(injected);
513        assert_ne!(gr.items[0].base.quantity, original_qty);
514        assert_eq!(injector.get_results().len(), 1);
515        assert_eq!(
516            injector.get_results()[0].anomaly_type,
517            DocumentFlowAnomalyType::QuantityMismatch
518        );
519    }
520
521    #[test]
522    fn test_maverick_buying_injection() {
523        let config = DocumentFlowAnomalyConfig {
524            maverick_buying_rate: 1.0, // Always inject
525            ..Default::default()
526        };
527
528        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
529        let mut invoice = create_test_invoice(Some("PO-001"));
530
531        assert!(invoice.purchase_order_id.is_some());
532        let injected = injector.inject_maverick_buying(&mut invoice);
533
534        assert!(injected);
535        assert!(invoice.purchase_order_id.is_none());
536        assert_eq!(
537            injector.get_results()[0].anomaly_type,
538            DocumentFlowAnomalyType::InvoiceWithoutPO
539        );
540    }
541
542    #[test]
543    fn test_statistics() {
544        let config = DocumentFlowAnomalyConfig {
545            quantity_mismatch_rate: 1.0,
546            maverick_buying_rate: 1.0,
547            ..Default::default()
548        };
549
550        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
551
552        // Inject quantity mismatch
553        let po = create_test_po();
554        let mut gr = create_test_gr(&po.header.document_id);
555        injector.maybe_inject_quantity_mismatch(&mut gr, &po);
556
557        // Inject maverick buying
558        let mut invoice = create_test_invoice(Some("PO-001"));
559        injector.inject_maverick_buying(&mut invoice);
560
561        let stats = injector.get_statistics();
562        assert_eq!(stats.total, 2);
563        assert_eq!(stats.quantity_mismatches, 1);
564        assert_eq!(stats.maverick_buying, 1);
565    }
566
567    #[test]
568    fn test_labeled_anomaly_conversion() {
569        let result = DocumentFlowAnomalyResult {
570            anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
571            description: "Test mismatch".to_string(),
572            original_value: Some("100".to_string()),
573            modified_value: Some("120".to_string()),
574            document_ids: vec!["DOC-001".to_string()],
575            severity: 3,
576        };
577
578        let labeled = result.to_labeled_anomaly(
579            "ANO-001",
580            "DOC-001",
581            "1000",
582            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
583        );
584
585        assert_eq!(labeled.document_id, "DOC-001");
586        assert_eq!(labeled.company_code, "1000");
587        // QuantityMismatch maps to InvoiceManipulation
588        assert!(matches!(
589            labeled.anomaly_type,
590            AnomalyType::Fraud(FraudType::InvoiceManipulation)
591        ));
592    }
593}