Skip to main content

datasynth_generators/anomaly/
document_flow_anomalies.rs

1//! Document flow anomaly injection for 3-way match fraud patterns.
2//!
3//! This module provides anomaly injection specifically for document flows,
4//! simulating common procurement fraud patterns:
5//! - Quantity mismatches between PO, GR, and Invoice
6//! - Maverick buying (Invoice without PO)
7//! - Unbilled goods (GR without Invoice)
8//! - Unauthorized disbursements (Payment without Invoice)
9
10use chrono::NaiveDate;
11use rand::prelude::*;
12use rand_chacha::ChaCha8Rng;
13use rust_decimal::Decimal;
14use serde::{Deserialize, Serialize};
15
16use datasynth_core::models::documents::{GoodsReceipt, Payment, PurchaseOrder, VendorInvoice};
17use datasynth_core::{AnomalyType, FraudType, LabeledAnomaly, ProcessIssueType};
18
19/// Types of document flow anomalies.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
21pub enum DocumentFlowAnomalyType {
22    /// GR quantity doesn't match PO quantity
23    QuantityMismatch,
24    /// Price on invoice doesn't match PO price
25    PriceMismatch,
26    /// Invoice received without corresponding PO (maverick buying)
27    InvoiceWithoutPO,
28    /// Goods received but never invoiced
29    GoodsReceivedNotBilled,
30    /// Payment issued without valid invoice
31    PaymentWithoutInvoice,
32    /// Duplicate invoice for same PO
33    DuplicateInvoice,
34    /// Invoice date before goods receipt
35    InvoiceBeforeReceipt,
36    /// Payment before invoice approval
37    EarlyPayment,
38}
39
40/// Result of injecting a document flow anomaly.
41#[derive(Debug, Clone)]
42pub struct DocumentFlowAnomalyResult {
43    /// Type of anomaly injected
44    pub anomaly_type: DocumentFlowAnomalyType,
45    /// Description of what was modified
46    pub description: String,
47    /// Original value (if applicable)
48    pub original_value: Option<String>,
49    /// Modified value (if applicable)
50    pub modified_value: Option<String>,
51    /// Associated document IDs
52    pub document_ids: Vec<String>,
53    /// Severity (1-5)
54    pub severity: u8,
55}
56
57impl DocumentFlowAnomalyResult {
58    /// Convert to a labeled anomaly for ML training.
59    pub fn to_labeled_anomaly(
60        &self,
61        anomaly_id: &str,
62        document_id: &str,
63        company_code: &str,
64        date: NaiveDate,
65    ) -> LabeledAnomaly {
66        // Map document flow anomaly types to existing AnomalyType variants
67        let anomaly_type = match self.anomaly_type {
68            // Quantity/price mismatches are invoice manipulation fraud
69            DocumentFlowAnomalyType::QuantityMismatch => {
70                AnomalyType::Fraud(FraudType::InvoiceManipulation)
71            }
72            DocumentFlowAnomalyType::PriceMismatch => {
73                AnomalyType::Fraud(FraudType::InvoiceManipulation)
74            }
75            // Invoice without PO is a process issue (missing documentation/control bypass)
76            DocumentFlowAnomalyType::InvoiceWithoutPO => {
77                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
78            }
79            // Goods received but not billed could indicate asset misappropriation
80            DocumentFlowAnomalyType::GoodsReceivedNotBilled => {
81                AnomalyType::Fraud(FraudType::AssetMisappropriation)
82            }
83            // Payment without invoice is unauthorized approval
84            DocumentFlowAnomalyType::PaymentWithoutInvoice => {
85                AnomalyType::Fraud(FraudType::UnauthorizedApproval)
86            }
87            // Duplicate invoice is duplicate payment fraud
88            DocumentFlowAnomalyType::DuplicateInvoice => {
89                AnomalyType::Fraud(FraudType::DuplicatePayment)
90            }
91            // Invoice before receipt is process timing issue
92            DocumentFlowAnomalyType::InvoiceBeforeReceipt => {
93                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
94            }
95            // Early payment bypasses normal approval
96            DocumentFlowAnomalyType::EarlyPayment => {
97                AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval)
98            }
99        };
100
101        LabeledAnomaly::new(
102            anomaly_id.to_string(),
103            anomaly_type,
104            document_id.to_string(),
105            "DocumentFlow".to_string(),
106            company_code.to_string(),
107            date,
108        )
109        .with_description(&self.description)
110    }
111}
112
113/// Configuration for document flow anomaly injection.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct DocumentFlowAnomalyConfig {
116    /// Probability of quantity mismatch (0.0-1.0)
117    pub quantity_mismatch_rate: f64,
118    /// Probability of price mismatch
119    pub price_mismatch_rate: f64,
120    /// Probability of invoice without PO
121    pub maverick_buying_rate: f64,
122    /// Probability of GR without invoice
123    pub unbilled_receipt_rate: f64,
124    /// Probability of payment without invoice
125    pub unauthorized_payment_rate: f64,
126    /// Probability of duplicate invoice
127    pub duplicate_invoice_rate: f64,
128    /// Probability of invoice before receipt
129    pub early_invoice_rate: f64,
130    /// Probability of early payment
131    pub early_payment_rate: f64,
132    /// Maximum quantity variance percentage (e.g., 0.2 = 20%)
133    pub max_quantity_variance: f64,
134    /// Maximum price variance percentage
135    pub max_price_variance: f64,
136}
137
138impl Default for DocumentFlowAnomalyConfig {
139    fn default() -> Self {
140        Self {
141            quantity_mismatch_rate: 0.02,     // 2% of receipts
142            price_mismatch_rate: 0.015,       // 1.5% of invoices
143            maverick_buying_rate: 0.01,       // 1% maverick buying
144            unbilled_receipt_rate: 0.005,     // 0.5% unbilled
145            unauthorized_payment_rate: 0.002, // 0.2% unauthorized
146            duplicate_invoice_rate: 0.008,    // 0.8% duplicates
147            early_invoice_rate: 0.01,         // 1% early invoices
148            early_payment_rate: 0.005,        // 0.5% early payments
149            max_quantity_variance: 0.25,      // Up to 25% variance
150            max_price_variance: 0.15,         // Up to 15% variance
151        }
152    }
153}
154
155/// Injector for document flow anomalies.
156pub struct DocumentFlowAnomalyInjector {
157    config: DocumentFlowAnomalyConfig,
158    rng: ChaCha8Rng,
159    results: Vec<DocumentFlowAnomalyResult>,
160}
161
162impl DocumentFlowAnomalyInjector {
163    /// Create a new document flow anomaly injector.
164    pub fn new(config: DocumentFlowAnomalyConfig, seed: u64) -> Self {
165        Self {
166            config,
167            rng: ChaCha8Rng::seed_from_u64(seed),
168            results: Vec::new(),
169        }
170    }
171
172    /// Create with default configuration.
173    pub fn with_seed(seed: u64) -> Self {
174        Self::new(DocumentFlowAnomalyConfig::default(), seed)
175    }
176
177    /// Get the results of anomaly injection.
178    pub fn get_results(&self) -> &[DocumentFlowAnomalyResult] {
179        &self.results
180    }
181
182    /// Clear results.
183    pub fn clear_results(&mut self) {
184        self.results.clear();
185    }
186
187    /// Maybe inject a quantity mismatch into a goods receipt.
188    ///
189    /// Returns true if an anomaly was injected.
190    pub fn maybe_inject_quantity_mismatch(
191        &mut self,
192        gr: &mut GoodsReceipt,
193        po: &PurchaseOrder,
194    ) -> bool {
195        if self.rng.gen::<f64>() >= self.config.quantity_mismatch_rate {
196            return false;
197        }
198
199        // Find a matching item to modify
200        if let Some(gr_item) = gr.items.first_mut() {
201            let original_qty = gr_item.base.quantity;
202
203            // Generate variance (either over or under)
204            let variance = if self.rng.gen::<bool>() {
205                // Over-receipt (more common in fraud)
206                Decimal::from_f64_retain(
207                    1.0 + self.rng.gen::<f64>() * self.config.max_quantity_variance,
208                )
209                .unwrap_or(Decimal::ONE)
210            } else {
211                // Under-receipt
212                Decimal::from_f64_retain(
213                    1.0 - self.rng.gen::<f64>() * self.config.max_quantity_variance,
214                )
215                .unwrap_or(Decimal::ONE)
216            };
217
218            gr_item.base.quantity = (original_qty * variance).round_dp(2);
219
220            let result = DocumentFlowAnomalyResult {
221                anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
222                description: format!(
223                    "GR quantity {} doesn't match PO, expected based on PO line",
224                    gr_item.base.quantity
225                ),
226                original_value: Some(original_qty.to_string()),
227                modified_value: Some(gr_item.base.quantity.to_string()),
228                document_ids: vec![gr.header.document_id.clone(), po.header.document_id.clone()],
229                severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
230                {
231                    4
232                } else {
233                    3
234                },
235            };
236
237            self.results.push(result);
238            true
239        } else {
240            false
241        }
242    }
243
244    /// Maybe inject a price mismatch into a vendor invoice.
245    ///
246    /// Returns true if an anomaly was injected.
247    pub fn maybe_inject_price_mismatch(
248        &mut self,
249        invoice: &mut VendorInvoice,
250        po: &PurchaseOrder,
251    ) -> bool {
252        if self.rng.gen::<f64>() >= self.config.price_mismatch_rate {
253            return false;
254        }
255
256        // Find a matching item to modify
257        if let Some(inv_item) = invoice.items.first_mut() {
258            let original_price = inv_item.base.unit_price;
259
260            // Usually invoices are higher than PO (vendor overcharging)
261            let variance = if self.rng.gen::<f64>() < 0.8 {
262                // 80% chance of overcharge
263                Decimal::from_f64_retain(
264                    1.0 + self.rng.gen::<f64>() * self.config.max_price_variance,
265                )
266                .unwrap_or(Decimal::ONE)
267            } else {
268                // 20% chance of undercharge (rare, could be error)
269                Decimal::from_f64_retain(
270                    1.0 - self.rng.gen::<f64>() * self.config.max_price_variance * 0.5,
271                )
272                .unwrap_or(Decimal::ONE)
273            };
274
275            inv_item.base.unit_price = (original_price * variance).round_dp(2);
276
277            let result = DocumentFlowAnomalyResult {
278                anomaly_type: DocumentFlowAnomalyType::PriceMismatch,
279                description: format!(
280                    "Invoice price {} doesn't match PO agreed price",
281                    inv_item.base.unit_price
282                ),
283                original_value: Some(original_price.to_string()),
284                modified_value: Some(inv_item.base.unit_price.to_string()),
285                document_ids: vec![
286                    invoice.header.document_id.clone(),
287                    po.header.document_id.clone(),
288                ],
289                severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
290                {
291                    4
292                } else {
293                    3
294                },
295            };
296
297            self.results.push(result);
298            true
299        } else {
300            false
301        }
302    }
303
304    /// Create an invoice without PO reference (maverick buying).
305    ///
306    /// Removes the PO reference from an invoice to simulate maverick buying.
307    pub fn inject_maverick_buying(&mut self, invoice: &mut VendorInvoice) -> bool {
308        if self.rng.gen::<f64>() >= self.config.maverick_buying_rate {
309            return false;
310        }
311
312        // Only inject if there's a PO to remove
313        if invoice.purchase_order_id.is_none() {
314            return false;
315        }
316
317        let original_po = invoice.purchase_order_id.take();
318
319        let result = DocumentFlowAnomalyResult {
320            anomaly_type: DocumentFlowAnomalyType::InvoiceWithoutPO,
321            description: "Invoice submitted without purchase order (maverick buying)".to_string(),
322            original_value: original_po,
323            modified_value: None,
324            document_ids: vec![invoice.header.document_id.clone()],
325            severity: 4, // Significant control bypass
326        };
327
328        self.results.push(result);
329        true
330    }
331
332    /// Mark a goods receipt as having invoice timing anomaly.
333    ///
334    /// Returns a result indicating invoice came before goods receipt.
335    pub fn create_early_invoice_anomaly(
336        &mut self,
337        invoice: &VendorInvoice,
338        gr: &GoodsReceipt,
339    ) -> Option<DocumentFlowAnomalyResult> {
340        if self.rng.gen::<f64>() >= self.config.early_invoice_rate {
341            return None;
342        }
343
344        // Check if invoice date is before GR date
345        if invoice.invoice_date < gr.header.document_date {
346            let result = DocumentFlowAnomalyResult {
347                anomaly_type: DocumentFlowAnomalyType::InvoiceBeforeReceipt,
348                description: format!(
349                    "Invoice dated {} before goods receipt dated {}",
350                    invoice.invoice_date, gr.header.document_date
351                ),
352                original_value: Some(gr.header.document_date.to_string()),
353                modified_value: Some(invoice.invoice_date.to_string()),
354                document_ids: vec![
355                    invoice.header.document_id.clone(),
356                    gr.header.document_id.clone(),
357                ],
358                severity: 3,
359            };
360
361            self.results.push(result.clone());
362            return Some(result);
363        }
364
365        None
366    }
367
368    /// Check for potential unauthorized payment (payment without proper invoice).
369    pub fn check_unauthorized_payment(
370        &mut self,
371        payment: &Payment,
372        has_valid_invoice: bool,
373    ) -> Option<DocumentFlowAnomalyResult> {
374        if has_valid_invoice {
375            return None;
376        }
377
378        if self.rng.gen::<f64>() >= self.config.unauthorized_payment_rate {
379            return None;
380        }
381
382        let result = DocumentFlowAnomalyResult {
383            anomaly_type: DocumentFlowAnomalyType::PaymentWithoutInvoice,
384            description: "Payment issued without valid approved invoice".to_string(),
385            original_value: None,
386            modified_value: None,
387            document_ids: vec![payment.header.document_id.clone()],
388            severity: 5, // Critical - potential fraud
389        };
390
391        self.results.push(result.clone());
392        Some(result)
393    }
394
395    /// Get statistics about injected anomalies.
396    pub fn get_statistics(&self) -> DocumentFlowAnomalyStats {
397        let mut stats = DocumentFlowAnomalyStats::default();
398
399        for result in &self.results {
400            match result.anomaly_type {
401                DocumentFlowAnomalyType::QuantityMismatch => stats.quantity_mismatches += 1,
402                DocumentFlowAnomalyType::PriceMismatch => stats.price_mismatches += 1,
403                DocumentFlowAnomalyType::InvoiceWithoutPO => stats.maverick_buying += 1,
404                DocumentFlowAnomalyType::GoodsReceivedNotBilled => stats.unbilled_receipts += 1,
405                DocumentFlowAnomalyType::PaymentWithoutInvoice => stats.unauthorized_payments += 1,
406                DocumentFlowAnomalyType::DuplicateInvoice => stats.duplicate_invoices += 1,
407                DocumentFlowAnomalyType::InvoiceBeforeReceipt => stats.early_invoices += 1,
408                DocumentFlowAnomalyType::EarlyPayment => stats.early_payments += 1,
409            }
410        }
411
412        stats.total = self.results.len();
413        stats
414    }
415}
416
417/// Statistics about document flow anomalies.
418#[derive(Debug, Clone, Default)]
419pub struct DocumentFlowAnomalyStats {
420    pub total: usize,
421    pub quantity_mismatches: usize,
422    pub price_mismatches: usize,
423    pub maverick_buying: usize,
424    pub unbilled_receipts: usize,
425    pub unauthorized_payments: usize,
426    pub duplicate_invoices: usize,
427    pub early_invoices: usize,
428    pub early_payments: usize,
429}
430
431#[cfg(test)]
432#[allow(clippy::unwrap_used)]
433mod tests {
434    use super::*;
435    use datasynth_core::models::documents::{
436        GoodsReceiptItem, PurchaseOrderItem, VendorInvoiceItem,
437    };
438    use rust_decimal_macros::dec;
439
440    fn create_test_po() -> PurchaseOrder {
441        let mut po = PurchaseOrder::new(
442            "PO-001",
443            "1000",
444            "VEND001",
445            2024,
446            1,
447            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
448            "USER001",
449        );
450        po.add_item(PurchaseOrderItem::new(
451            1,
452            "Test Item",
453            dec!(100),
454            dec!(10.00),
455        ));
456        po
457    }
458
459    fn create_test_gr(_po_id: &str) -> GoodsReceipt {
460        let mut gr = GoodsReceipt::new(
461            "GR-001",
462            "1000",
463            "PLANT01",
464            "STOR01",
465            2024,
466            1,
467            NaiveDate::from_ymd_opt(2024, 1, 20).unwrap(),
468            "USER001",
469        );
470        gr.add_item(GoodsReceiptItem::new(
471            1,
472            "Test Item",
473            dec!(100),
474            dec!(10.00),
475        ));
476        gr
477    }
478
479    fn create_test_invoice(po_id: Option<&str>) -> VendorInvoice {
480        let mut inv = VendorInvoice::new(
481            "VI-001",
482            "1000",
483            "VEND001",
484            "INV-001",
485            2024,
486            1,
487            NaiveDate::from_ymd_opt(2024, 1, 25).unwrap(),
488            "USER001",
489        );
490        inv.purchase_order_id = po_id.map(|s| s.to_string());
491        inv.add_item(VendorInvoiceItem::new(
492            1,
493            "Test Item",
494            dec!(100),
495            dec!(10.00),
496        ));
497        inv
498    }
499
500    #[test]
501    fn test_quantity_mismatch_injection() {
502        // Use high rate to ensure injection
503        let config = DocumentFlowAnomalyConfig {
504            quantity_mismatch_rate: 1.0, // Always inject
505            ..Default::default()
506        };
507
508        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
509        let po = create_test_po();
510        let mut gr = create_test_gr(&po.header.document_id);
511
512        let original_qty = gr.items[0].base.quantity;
513        let injected = injector.maybe_inject_quantity_mismatch(&mut gr, &po);
514
515        assert!(injected);
516        assert_ne!(gr.items[0].base.quantity, original_qty);
517        assert_eq!(injector.get_results().len(), 1);
518        assert_eq!(
519            injector.get_results()[0].anomaly_type,
520            DocumentFlowAnomalyType::QuantityMismatch
521        );
522    }
523
524    #[test]
525    fn test_maverick_buying_injection() {
526        let config = DocumentFlowAnomalyConfig {
527            maverick_buying_rate: 1.0, // Always inject
528            ..Default::default()
529        };
530
531        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
532        let mut invoice = create_test_invoice(Some("PO-001"));
533
534        assert!(invoice.purchase_order_id.is_some());
535        let injected = injector.inject_maverick_buying(&mut invoice);
536
537        assert!(injected);
538        assert!(invoice.purchase_order_id.is_none());
539        assert_eq!(
540            injector.get_results()[0].anomaly_type,
541            DocumentFlowAnomalyType::InvoiceWithoutPO
542        );
543    }
544
545    #[test]
546    fn test_statistics() {
547        let config = DocumentFlowAnomalyConfig {
548            quantity_mismatch_rate: 1.0,
549            maverick_buying_rate: 1.0,
550            ..Default::default()
551        };
552
553        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
554
555        // Inject quantity mismatch
556        let po = create_test_po();
557        let mut gr = create_test_gr(&po.header.document_id);
558        injector.maybe_inject_quantity_mismatch(&mut gr, &po);
559
560        // Inject maverick buying
561        let mut invoice = create_test_invoice(Some("PO-001"));
562        injector.inject_maverick_buying(&mut invoice);
563
564        let stats = injector.get_statistics();
565        assert_eq!(stats.total, 2);
566        assert_eq!(stats.quantity_mismatches, 1);
567        assert_eq!(stats.maverick_buying, 1);
568    }
569
570    #[test]
571    fn test_labeled_anomaly_conversion() {
572        let result = DocumentFlowAnomalyResult {
573            anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
574            description: "Test mismatch".to_string(),
575            original_value: Some("100".to_string()),
576            modified_value: Some("120".to_string()),
577            document_ids: vec!["DOC-001".to_string()],
578            severity: 3,
579        };
580
581        let labeled = result.to_labeled_anomaly(
582            "ANO-001",
583            "DOC-001",
584            "1000",
585            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
586        );
587
588        assert_eq!(labeled.document_id, "DOC-001");
589        assert_eq!(labeled.company_code, "1000");
590        // QuantityMismatch maps to InvoiceManipulation
591        assert!(matches!(
592            labeled.anomaly_type,
593            AnomalyType::Fraud(FraudType::InvoiceManipulation)
594        ));
595    }
596}