Skip to main content

datasynth_generators/anomaly/
document_flow_anomalies.rs

1//! Document flow anomaly injection for 3-way match fraud patterns.
2//!
3//! This module provides anomaly injection specifically for document flows,
4//! simulating common procurement fraud patterns:
5//! - Quantity mismatches between PO, GR, and Invoice
6//! - Maverick buying (Invoice without PO)
7//! - Unbilled goods (GR without Invoice)
8//! - Unauthorized disbursements (Payment without Invoice)
9
10use chrono::NaiveDate;
11use datasynth_core::utils::seeded_rng;
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use rust_decimal::Decimal;
15use serde::{Deserialize, Serialize};
16
17use datasynth_core::models::documents::{GoodsReceipt, Payment, PurchaseOrder, VendorInvoice};
18use datasynth_core::{AnomalyType, FraudType, LabeledAnomaly, ProcessIssueType};
19
20/// Types of document flow anomalies.
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
22pub enum DocumentFlowAnomalyType {
23    /// GR quantity doesn't match PO quantity
24    QuantityMismatch,
25    /// Price on invoice doesn't match PO price
26    PriceMismatch,
27    /// Invoice received without corresponding PO (maverick buying)
28    InvoiceWithoutPO,
29    /// Goods received but never invoiced
30    GoodsReceivedNotBilled,
31    /// Payment issued without valid invoice
32    PaymentWithoutInvoice,
33    /// Duplicate invoice for same PO
34    DuplicateInvoice,
35    /// Invoice date before goods receipt
36    InvoiceBeforeReceipt,
37    /// Payment before invoice approval
38    EarlyPayment,
39}
40
41/// Result of injecting a document flow anomaly.
42#[derive(Debug, Clone)]
43pub struct DocumentFlowAnomalyResult {
44    /// Type of anomaly injected
45    pub anomaly_type: DocumentFlowAnomalyType,
46    /// Description of what was modified
47    pub description: String,
48    /// Original value (if applicable)
49    pub original_value: Option<String>,
50    /// Modified value (if applicable)
51    pub modified_value: Option<String>,
52    /// Associated document IDs
53    pub document_ids: Vec<String>,
54    /// Severity (1-5)
55    pub severity: u8,
56}
57
58impl DocumentFlowAnomalyResult {
59    /// Convert to a labeled anomaly for ML training.
60    pub fn to_labeled_anomaly(
61        &self,
62        anomaly_id: &str,
63        document_id: &str,
64        company_code: &str,
65        date: NaiveDate,
66    ) -> LabeledAnomaly {
67        // Map document flow anomaly types to existing AnomalyType variants
68        let anomaly_type = match self.anomaly_type {
69            // Quantity/price mismatches are invoice manipulation fraud
70            DocumentFlowAnomalyType::QuantityMismatch => {
71                AnomalyType::Fraud(FraudType::InvoiceManipulation)
72            }
73            DocumentFlowAnomalyType::PriceMismatch => {
74                AnomalyType::Fraud(FraudType::InvoiceManipulation)
75            }
76            // Invoice without PO is a process issue (missing documentation/control bypass)
77            DocumentFlowAnomalyType::InvoiceWithoutPO => {
78                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
79            }
80            // Goods received but not billed could indicate asset misappropriation
81            DocumentFlowAnomalyType::GoodsReceivedNotBilled => {
82                AnomalyType::Fraud(FraudType::AssetMisappropriation)
83            }
84            // Payment without invoice is unauthorized approval
85            DocumentFlowAnomalyType::PaymentWithoutInvoice => {
86                AnomalyType::Fraud(FraudType::UnauthorizedApproval)
87            }
88            // Duplicate invoice is duplicate payment fraud
89            DocumentFlowAnomalyType::DuplicateInvoice => {
90                AnomalyType::Fraud(FraudType::DuplicatePayment)
91            }
92            // Invoice before receipt is process timing issue
93            DocumentFlowAnomalyType::InvoiceBeforeReceipt => {
94                AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
95            }
96            // Early payment bypasses normal approval
97            DocumentFlowAnomalyType::EarlyPayment => {
98                AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval)
99            }
100        };
101
102        LabeledAnomaly::new(
103            anomaly_id.to_string(),
104            anomaly_type,
105            document_id.to_string(),
106            "DocumentFlow".to_string(),
107            company_code.to_string(),
108            date,
109        )
110        .with_description(&self.description)
111    }
112}
113
114/// Configuration for document flow anomaly injection.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct DocumentFlowAnomalyConfig {
117    /// Probability of quantity mismatch (0.0-1.0)
118    pub quantity_mismatch_rate: f64,
119    /// Probability of price mismatch
120    pub price_mismatch_rate: f64,
121    /// Probability of invoice without PO
122    pub maverick_buying_rate: f64,
123    /// Probability of GR without invoice
124    pub unbilled_receipt_rate: f64,
125    /// Probability of payment without invoice
126    pub unauthorized_payment_rate: f64,
127    /// Probability of duplicate invoice
128    pub duplicate_invoice_rate: f64,
129    /// Probability of invoice before receipt
130    pub early_invoice_rate: f64,
131    /// Probability of early payment
132    pub early_payment_rate: f64,
133    /// Maximum quantity variance percentage (e.g., 0.2 = 20%)
134    pub max_quantity_variance: f64,
135    /// Maximum price variance percentage
136    pub max_price_variance: f64,
137}
138
139impl Default for DocumentFlowAnomalyConfig {
140    fn default() -> Self {
141        Self {
142            quantity_mismatch_rate: 0.02,     // 2% of receipts
143            price_mismatch_rate: 0.015,       // 1.5% of invoices
144            maverick_buying_rate: 0.01,       // 1% maverick buying
145            unbilled_receipt_rate: 0.005,     // 0.5% unbilled
146            unauthorized_payment_rate: 0.002, // 0.2% unauthorized
147            duplicate_invoice_rate: 0.008,    // 0.8% duplicates
148            early_invoice_rate: 0.01,         // 1% early invoices
149            early_payment_rate: 0.005,        // 0.5% early payments
150            max_quantity_variance: 0.25,      // Up to 25% variance
151            max_price_variance: 0.15,         // Up to 15% variance
152        }
153    }
154}
155
156/// Injector for document flow anomalies.
157pub struct DocumentFlowAnomalyInjector {
158    config: DocumentFlowAnomalyConfig,
159    rng: ChaCha8Rng,
160    results: Vec<DocumentFlowAnomalyResult>,
161}
162
163impl DocumentFlowAnomalyInjector {
164    /// Create a new document flow anomaly injector.
165    pub fn new(config: DocumentFlowAnomalyConfig, seed: u64) -> Self {
166        Self {
167            config,
168            rng: seeded_rng(seed, 0),
169            results: Vec::new(),
170        }
171    }
172
173    /// Create with default configuration.
174    pub fn with_seed(seed: u64) -> Self {
175        Self::new(DocumentFlowAnomalyConfig::default(), seed)
176    }
177
178    /// Get the results of anomaly injection.
179    pub fn get_results(&self) -> &[DocumentFlowAnomalyResult] {
180        &self.results
181    }
182
183    /// Clear results.
184    pub fn clear_results(&mut self) {
185        self.results.clear();
186    }
187
188    /// Maybe inject a quantity mismatch into a goods receipt.
189    ///
190    /// Returns true if an anomaly was injected.
191    pub fn maybe_inject_quantity_mismatch(
192        &mut self,
193        gr: &mut GoodsReceipt,
194        po: &PurchaseOrder,
195    ) -> bool {
196        if self.rng.gen::<f64>() >= self.config.quantity_mismatch_rate {
197            return false;
198        }
199
200        // Find a matching item to modify
201        if let Some(gr_item) = gr.items.first_mut() {
202            let original_qty = gr_item.base.quantity;
203
204            // Generate variance (either over or under)
205            let variance = if self.rng.gen::<bool>() {
206                // Over-receipt (more common in fraud)
207                Decimal::from_f64_retain(
208                    1.0 + self.rng.gen::<f64>() * self.config.max_quantity_variance,
209                )
210                .unwrap_or(Decimal::ONE)
211            } else {
212                // Under-receipt
213                Decimal::from_f64_retain(
214                    1.0 - self.rng.gen::<f64>() * self.config.max_quantity_variance,
215                )
216                .unwrap_or(Decimal::ONE)
217            };
218
219            gr_item.base.quantity = (original_qty * variance).round_dp(2);
220
221            let result = DocumentFlowAnomalyResult {
222                anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
223                description: format!(
224                    "GR quantity {} doesn't match PO, expected based on PO line",
225                    gr_item.base.quantity
226                ),
227                original_value: Some(original_qty.to_string()),
228                modified_value: Some(gr_item.base.quantity.to_string()),
229                document_ids: vec![gr.header.document_id.clone(), po.header.document_id.clone()],
230                severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
231                {
232                    4
233                } else {
234                    3
235                },
236            };
237
238            self.results.push(result);
239            true
240        } else {
241            false
242        }
243    }
244
245    /// Maybe inject a price mismatch into a vendor invoice.
246    ///
247    /// Returns true if an anomaly was injected.
248    pub fn maybe_inject_price_mismatch(
249        &mut self,
250        invoice: &mut VendorInvoice,
251        po: &PurchaseOrder,
252    ) -> bool {
253        if self.rng.gen::<f64>() >= self.config.price_mismatch_rate {
254            return false;
255        }
256
257        // Find a matching item to modify
258        if let Some(inv_item) = invoice.items.first_mut() {
259            let original_price = inv_item.base.unit_price;
260
261            // Usually invoices are higher than PO (vendor overcharging)
262            let variance = if self.rng.gen::<f64>() < 0.8 {
263                // 80% chance of overcharge
264                Decimal::from_f64_retain(
265                    1.0 + self.rng.gen::<f64>() * self.config.max_price_variance,
266                )
267                .unwrap_or(Decimal::ONE)
268            } else {
269                // 20% chance of undercharge (rare, could be error)
270                Decimal::from_f64_retain(
271                    1.0 - self.rng.gen::<f64>() * self.config.max_price_variance * 0.5,
272                )
273                .unwrap_or(Decimal::ONE)
274            };
275
276            inv_item.base.unit_price = (original_price * variance).round_dp(2);
277
278            let result = DocumentFlowAnomalyResult {
279                anomaly_type: DocumentFlowAnomalyType::PriceMismatch,
280                description: format!(
281                    "Invoice price {} doesn't match PO agreed price",
282                    inv_item.base.unit_price
283                ),
284                original_value: Some(original_price.to_string()),
285                modified_value: Some(inv_item.base.unit_price.to_string()),
286                document_ids: vec![
287                    invoice.header.document_id.clone(),
288                    po.header.document_id.clone(),
289                ],
290                severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
291                {
292                    4
293                } else {
294                    3
295                },
296            };
297
298            self.results.push(result);
299            true
300        } else {
301            false
302        }
303    }
304
305    /// Create an invoice without PO reference (maverick buying).
306    ///
307    /// Removes the PO reference from an invoice to simulate maverick buying.
308    pub fn inject_maverick_buying(&mut self, invoice: &mut VendorInvoice) -> bool {
309        if self.rng.gen::<f64>() >= self.config.maverick_buying_rate {
310            return false;
311        }
312
313        // Only inject if there's a PO to remove
314        if invoice.purchase_order_id.is_none() {
315            return false;
316        }
317
318        let original_po = invoice.purchase_order_id.take();
319
320        let result = DocumentFlowAnomalyResult {
321            anomaly_type: DocumentFlowAnomalyType::InvoiceWithoutPO,
322            description: "Invoice submitted without purchase order (maverick buying)".to_string(),
323            original_value: original_po,
324            modified_value: None,
325            document_ids: vec![invoice.header.document_id.clone()],
326            severity: 4, // Significant control bypass
327        };
328
329        self.results.push(result);
330        true
331    }
332
333    /// Mark a goods receipt as having invoice timing anomaly.
334    ///
335    /// Returns a result indicating invoice came before goods receipt.
336    pub fn create_early_invoice_anomaly(
337        &mut self,
338        invoice: &VendorInvoice,
339        gr: &GoodsReceipt,
340    ) -> Option<DocumentFlowAnomalyResult> {
341        if self.rng.gen::<f64>() >= self.config.early_invoice_rate {
342            return None;
343        }
344
345        // Check if invoice date is before GR date
346        if invoice.invoice_date < gr.header.document_date {
347            let result = DocumentFlowAnomalyResult {
348                anomaly_type: DocumentFlowAnomalyType::InvoiceBeforeReceipt,
349                description: format!(
350                    "Invoice dated {} before goods receipt dated {}",
351                    invoice.invoice_date, gr.header.document_date
352                ),
353                original_value: Some(gr.header.document_date.to_string()),
354                modified_value: Some(invoice.invoice_date.to_string()),
355                document_ids: vec![
356                    invoice.header.document_id.clone(),
357                    gr.header.document_id.clone(),
358                ],
359                severity: 3,
360            };
361
362            self.results.push(result.clone());
363            return Some(result);
364        }
365
366        None
367    }
368
369    /// Check for potential unauthorized payment (payment without proper invoice).
370    pub fn check_unauthorized_payment(
371        &mut self,
372        payment: &Payment,
373        has_valid_invoice: bool,
374    ) -> Option<DocumentFlowAnomalyResult> {
375        if has_valid_invoice {
376            return None;
377        }
378
379        if self.rng.gen::<f64>() >= self.config.unauthorized_payment_rate {
380            return None;
381        }
382
383        let result = DocumentFlowAnomalyResult {
384            anomaly_type: DocumentFlowAnomalyType::PaymentWithoutInvoice,
385            description: "Payment issued without valid approved invoice".to_string(),
386            original_value: None,
387            modified_value: None,
388            document_ids: vec![payment.header.document_id.clone()],
389            severity: 5, // Critical - potential fraud
390        };
391
392        self.results.push(result.clone());
393        Some(result)
394    }
395
396    /// Get statistics about injected anomalies.
397    pub fn get_statistics(&self) -> DocumentFlowAnomalyStats {
398        let mut stats = DocumentFlowAnomalyStats::default();
399
400        for result in &self.results {
401            match result.anomaly_type {
402                DocumentFlowAnomalyType::QuantityMismatch => stats.quantity_mismatches += 1,
403                DocumentFlowAnomalyType::PriceMismatch => stats.price_mismatches += 1,
404                DocumentFlowAnomalyType::InvoiceWithoutPO => stats.maverick_buying += 1,
405                DocumentFlowAnomalyType::GoodsReceivedNotBilled => stats.unbilled_receipts += 1,
406                DocumentFlowAnomalyType::PaymentWithoutInvoice => stats.unauthorized_payments += 1,
407                DocumentFlowAnomalyType::DuplicateInvoice => stats.duplicate_invoices += 1,
408                DocumentFlowAnomalyType::InvoiceBeforeReceipt => stats.early_invoices += 1,
409                DocumentFlowAnomalyType::EarlyPayment => stats.early_payments += 1,
410            }
411        }
412
413        stats.total = self.results.len();
414        stats
415    }
416}
417
418/// Statistics about document flow anomalies.
419#[derive(Debug, Clone, Default)]
420pub struct DocumentFlowAnomalyStats {
421    pub total: usize,
422    pub quantity_mismatches: usize,
423    pub price_mismatches: usize,
424    pub maverick_buying: usize,
425    pub unbilled_receipts: usize,
426    pub unauthorized_payments: usize,
427    pub duplicate_invoices: usize,
428    pub early_invoices: usize,
429    pub early_payments: usize,
430}
431
432#[cfg(test)]
433#[allow(clippy::unwrap_used)]
434mod tests {
435    use super::*;
436    use datasynth_core::models::documents::{
437        GoodsReceiptItem, PurchaseOrderItem, VendorInvoiceItem,
438    };
439    use rust_decimal_macros::dec;
440
441    fn create_test_po() -> PurchaseOrder {
442        let mut po = PurchaseOrder::new(
443            "PO-001",
444            "1000",
445            "VEND001",
446            2024,
447            1,
448            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
449            "USER001",
450        );
451        po.add_item(PurchaseOrderItem::new(
452            1,
453            "Test Item",
454            dec!(100),
455            dec!(10.00),
456        ));
457        po
458    }
459
460    fn create_test_gr(_po_id: &str) -> GoodsReceipt {
461        let mut gr = GoodsReceipt::new(
462            "GR-001",
463            "1000",
464            "PLANT01",
465            "STOR01",
466            2024,
467            1,
468            NaiveDate::from_ymd_opt(2024, 1, 20).unwrap(),
469            "USER001",
470        );
471        gr.add_item(GoodsReceiptItem::new(
472            1,
473            "Test Item",
474            dec!(100),
475            dec!(10.00),
476        ));
477        gr
478    }
479
480    fn create_test_invoice(po_id: Option<&str>) -> VendorInvoice {
481        let mut inv = VendorInvoice::new(
482            "VI-001",
483            "1000",
484            "VEND001",
485            "INV-001",
486            2024,
487            1,
488            NaiveDate::from_ymd_opt(2024, 1, 25).unwrap(),
489            "USER001",
490        );
491        inv.purchase_order_id = po_id.map(|s| s.to_string());
492        inv.add_item(VendorInvoiceItem::new(
493            1,
494            "Test Item",
495            dec!(100),
496            dec!(10.00),
497        ));
498        inv
499    }
500
501    #[test]
502    fn test_quantity_mismatch_injection() {
503        // Use high rate to ensure injection
504        let config = DocumentFlowAnomalyConfig {
505            quantity_mismatch_rate: 1.0, // Always inject
506            ..Default::default()
507        };
508
509        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
510        let po = create_test_po();
511        let mut gr = create_test_gr(&po.header.document_id);
512
513        let original_qty = gr.items[0].base.quantity;
514        let injected = injector.maybe_inject_quantity_mismatch(&mut gr, &po);
515
516        assert!(injected);
517        assert_ne!(gr.items[0].base.quantity, original_qty);
518        assert_eq!(injector.get_results().len(), 1);
519        assert_eq!(
520            injector.get_results()[0].anomaly_type,
521            DocumentFlowAnomalyType::QuantityMismatch
522        );
523    }
524
525    #[test]
526    fn test_maverick_buying_injection() {
527        let config = DocumentFlowAnomalyConfig {
528            maverick_buying_rate: 1.0, // Always inject
529            ..Default::default()
530        };
531
532        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
533        let mut invoice = create_test_invoice(Some("PO-001"));
534
535        assert!(invoice.purchase_order_id.is_some());
536        let injected = injector.inject_maverick_buying(&mut invoice);
537
538        assert!(injected);
539        assert!(invoice.purchase_order_id.is_none());
540        assert_eq!(
541            injector.get_results()[0].anomaly_type,
542            DocumentFlowAnomalyType::InvoiceWithoutPO
543        );
544    }
545
546    #[test]
547    fn test_statistics() {
548        let config = DocumentFlowAnomalyConfig {
549            quantity_mismatch_rate: 1.0,
550            maverick_buying_rate: 1.0,
551            ..Default::default()
552        };
553
554        let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
555
556        // Inject quantity mismatch
557        let po = create_test_po();
558        let mut gr = create_test_gr(&po.header.document_id);
559        injector.maybe_inject_quantity_mismatch(&mut gr, &po);
560
561        // Inject maverick buying
562        let mut invoice = create_test_invoice(Some("PO-001"));
563        injector.inject_maverick_buying(&mut invoice);
564
565        let stats = injector.get_statistics();
566        assert_eq!(stats.total, 2);
567        assert_eq!(stats.quantity_mismatches, 1);
568        assert_eq!(stats.maverick_buying, 1);
569    }
570
571    #[test]
572    fn test_labeled_anomaly_conversion() {
573        let result = DocumentFlowAnomalyResult {
574            anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
575            description: "Test mismatch".to_string(),
576            original_value: Some("100".to_string()),
577            modified_value: Some("120".to_string()),
578            document_ids: vec!["DOC-001".to_string()],
579            severity: 3,
580        };
581
582        let labeled = result.to_labeled_anomaly(
583            "ANO-001",
584            "DOC-001",
585            "1000",
586            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
587        );
588
589        assert_eq!(labeled.document_id, "DOC-001");
590        assert_eq!(labeled.company_code, "1000");
591        // QuantityMismatch maps to InvoiceManipulation
592        assert!(matches!(
593            labeled.anomaly_type,
594            AnomalyType::Fraud(FraudType::InvoiceManipulation)
595        ));
596    }
597}