use chrono::NaiveDate;
use datasynth_core::utils::seeded_rng;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rust_decimal::Decimal;
use serde::{Deserialize, Serialize};
use datasynth_core::models::documents::{GoodsReceipt, Payment, PurchaseOrder, VendorInvoice};
use datasynth_core::{AnomalyType, FraudType, LabeledAnomaly, ProcessIssueType};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DocumentFlowAnomalyType {
QuantityMismatch,
PriceMismatch,
InvoiceWithoutPO,
GoodsReceivedNotBilled,
PaymentWithoutInvoice,
DuplicateInvoice,
InvoiceBeforeReceipt,
EarlyPayment,
}
#[derive(Debug, Clone)]
pub struct DocumentFlowAnomalyResult {
pub anomaly_type: DocumentFlowAnomalyType,
pub description: String,
pub original_value: Option<String>,
pub modified_value: Option<String>,
pub document_ids: Vec<String>,
pub severity: u8,
}
impl DocumentFlowAnomalyResult {
pub fn to_labeled_anomaly(
&self,
anomaly_id: &str,
document_id: &str,
company_code: &str,
date: NaiveDate,
) -> LabeledAnomaly {
let anomaly_type = match self.anomaly_type {
DocumentFlowAnomalyType::QuantityMismatch => {
AnomalyType::Fraud(FraudType::InvoiceManipulation)
}
DocumentFlowAnomalyType::PriceMismatch => {
AnomalyType::Fraud(FraudType::InvoiceManipulation)
}
DocumentFlowAnomalyType::InvoiceWithoutPO => {
AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
}
DocumentFlowAnomalyType::GoodsReceivedNotBilled => {
AnomalyType::Fraud(FraudType::AssetMisappropriation)
}
DocumentFlowAnomalyType::PaymentWithoutInvoice => {
AnomalyType::Fraud(FraudType::UnauthorizedApproval)
}
DocumentFlowAnomalyType::DuplicateInvoice => {
AnomalyType::Fraud(FraudType::DuplicatePayment)
}
DocumentFlowAnomalyType::InvoiceBeforeReceipt => {
AnomalyType::ProcessIssue(ProcessIssueType::MissingDocumentation)
}
DocumentFlowAnomalyType::EarlyPayment => {
AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval)
}
};
LabeledAnomaly::new(
anomaly_id.to_string(),
anomaly_type,
document_id.to_string(),
"DocumentFlow".to_string(),
company_code.to_string(),
date,
)
.with_description(&self.description)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentFlowAnomalyConfig {
pub quantity_mismatch_rate: f64,
pub price_mismatch_rate: f64,
pub maverick_buying_rate: f64,
pub unbilled_receipt_rate: f64,
pub unauthorized_payment_rate: f64,
pub duplicate_invoice_rate: f64,
pub early_invoice_rate: f64,
pub early_payment_rate: f64,
pub max_quantity_variance: f64,
pub max_price_variance: f64,
}
impl Default for DocumentFlowAnomalyConfig {
fn default() -> Self {
Self {
quantity_mismatch_rate: 0.02, price_mismatch_rate: 0.015, maverick_buying_rate: 0.01, unbilled_receipt_rate: 0.005, unauthorized_payment_rate: 0.002, duplicate_invoice_rate: 0.008, early_invoice_rate: 0.01, early_payment_rate: 0.005, max_quantity_variance: 0.25, max_price_variance: 0.15, }
}
}
pub struct DocumentFlowAnomalyInjector {
config: DocumentFlowAnomalyConfig,
rng: ChaCha8Rng,
results: Vec<DocumentFlowAnomalyResult>,
}
impl DocumentFlowAnomalyInjector {
pub fn new(config: DocumentFlowAnomalyConfig, seed: u64) -> Self {
Self {
config,
rng: seeded_rng(seed, 0),
results: Vec::new(),
}
}
pub fn with_seed(seed: u64) -> Self {
Self::new(DocumentFlowAnomalyConfig::default(), seed)
}
pub fn get_results(&self) -> &[DocumentFlowAnomalyResult] {
&self.results
}
pub fn clear_results(&mut self) {
self.results.clear();
}
pub fn maybe_inject_quantity_mismatch(
&mut self,
gr: &mut GoodsReceipt,
po: &PurchaseOrder,
) -> bool {
if self.rng.random::<f64>() >= self.config.quantity_mismatch_rate {
return false;
}
if let Some(gr_item) = gr.items.first_mut() {
let original_qty = gr_item.base.quantity;
let variance = if self.rng.random::<bool>() {
Decimal::from_f64_retain(
1.0 + self.rng.random::<f64>() * self.config.max_quantity_variance,
)
.unwrap_or(Decimal::ONE)
} else {
Decimal::from_f64_retain(
1.0 - self.rng.random::<f64>() * self.config.max_quantity_variance,
)
.unwrap_or(Decimal::ONE)
};
gr_item.base.quantity = (original_qty * variance).round_dp(2);
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
description: format!(
"GR quantity {} doesn't match PO, expected based on PO line",
gr_item.base.quantity
),
original_value: Some(original_qty.to_string()),
modified_value: Some(gr_item.base.quantity.to_string()),
document_ids: vec![gr.header.document_id.clone(), po.header.document_id.clone()],
severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
{
4
} else {
3
},
};
self.results.push(result);
true
} else {
false
}
}
pub fn maybe_inject_price_mismatch(
&mut self,
invoice: &mut VendorInvoice,
po: &PurchaseOrder,
) -> bool {
if self.rng.random::<f64>() >= self.config.price_mismatch_rate {
return false;
}
if let Some(inv_item) = invoice.items.first_mut() {
let original_price = inv_item.base.unit_price;
let variance = if self.rng.random::<f64>() < 0.8 {
Decimal::from_f64_retain(
1.0 + self.rng.random::<f64>() * self.config.max_price_variance,
)
.unwrap_or(Decimal::ONE)
} else {
Decimal::from_f64_retain(
1.0 - self.rng.random::<f64>() * self.config.max_price_variance * 0.5,
)
.unwrap_or(Decimal::ONE)
};
inv_item.base.unit_price = (original_price * variance).round_dp(2);
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::PriceMismatch,
description: format!(
"Invoice price {} doesn't match PO agreed price",
inv_item.base.unit_price
),
original_value: Some(original_price.to_string()),
modified_value: Some(inv_item.base.unit_price.to_string()),
document_ids: vec![
invoice.header.document_id.clone(),
po.header.document_id.clone(),
],
severity: if variance > Decimal::from_f64_retain(1.1).expect("valid f64 to decimal")
{
4
} else {
3
},
};
self.results.push(result);
true
} else {
false
}
}
pub fn inject_maverick_buying(&mut self, invoice: &mut VendorInvoice) -> bool {
if self.rng.random::<f64>() >= self.config.maverick_buying_rate {
return false;
}
if invoice.purchase_order_id.is_none() {
return false;
}
let original_po = invoice.purchase_order_id.take();
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::InvoiceWithoutPO,
description: "Invoice submitted without purchase order (maverick buying)".to_string(),
original_value: original_po,
modified_value: None,
document_ids: vec![invoice.header.document_id.clone()],
severity: 4, };
self.results.push(result);
true
}
pub fn create_early_invoice_anomaly(
&mut self,
invoice: &VendorInvoice,
gr: &GoodsReceipt,
) -> Option<DocumentFlowAnomalyResult> {
if self.rng.random::<f64>() >= self.config.early_invoice_rate {
return None;
}
if invoice.invoice_date < gr.header.document_date {
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::InvoiceBeforeReceipt,
description: format!(
"Invoice dated {} before goods receipt dated {}",
invoice.invoice_date, gr.header.document_date
),
original_value: Some(gr.header.document_date.to_string()),
modified_value: Some(invoice.invoice_date.to_string()),
document_ids: vec![
invoice.header.document_id.clone(),
gr.header.document_id.clone(),
],
severity: 3,
};
self.results.push(result.clone());
return Some(result);
}
None
}
pub fn check_unauthorized_payment(
&mut self,
payment: &Payment,
has_valid_invoice: bool,
) -> Option<DocumentFlowAnomalyResult> {
if has_valid_invoice {
return None;
}
if self.rng.random::<f64>() >= self.config.unauthorized_payment_rate {
return None;
}
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::PaymentWithoutInvoice,
description: "Payment issued without valid approved invoice".to_string(),
original_value: None,
modified_value: None,
document_ids: vec![payment.header.document_id.clone()],
severity: 5, };
self.results.push(result.clone());
Some(result)
}
pub fn get_statistics(&self) -> DocumentFlowAnomalyStats {
let mut stats = DocumentFlowAnomalyStats::default();
for result in &self.results {
match result.anomaly_type {
DocumentFlowAnomalyType::QuantityMismatch => stats.quantity_mismatches += 1,
DocumentFlowAnomalyType::PriceMismatch => stats.price_mismatches += 1,
DocumentFlowAnomalyType::InvoiceWithoutPO => stats.maverick_buying += 1,
DocumentFlowAnomalyType::GoodsReceivedNotBilled => stats.unbilled_receipts += 1,
DocumentFlowAnomalyType::PaymentWithoutInvoice => stats.unauthorized_payments += 1,
DocumentFlowAnomalyType::DuplicateInvoice => stats.duplicate_invoices += 1,
DocumentFlowAnomalyType::InvoiceBeforeReceipt => stats.early_invoices += 1,
DocumentFlowAnomalyType::EarlyPayment => stats.early_payments += 1,
}
}
stats.total = self.results.len();
stats
}
}
#[derive(Debug, Clone, Default)]
pub struct DocumentFlowAnomalyStats {
pub total: usize,
pub quantity_mismatches: usize,
pub price_mismatches: usize,
pub maverick_buying: usize,
pub unbilled_receipts: usize,
pub unauthorized_payments: usize,
pub duplicate_invoices: usize,
pub early_invoices: usize,
pub early_payments: usize,
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use datasynth_core::models::documents::{
GoodsReceiptItem, PurchaseOrderItem, VendorInvoiceItem,
};
use rust_decimal_macros::dec;
fn create_test_po() -> PurchaseOrder {
let mut po = PurchaseOrder::new(
"PO-001",
"1000",
"VEND001",
2024,
1,
NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
"USER001",
);
po.add_item(PurchaseOrderItem::new(
1,
"Test Item",
dec!(100),
dec!(10.00),
));
po
}
fn create_test_gr(_po_id: &str) -> GoodsReceipt {
let mut gr = GoodsReceipt::new(
"GR-001",
"1000",
"PLANT01",
"STOR01",
2024,
1,
NaiveDate::from_ymd_opt(2024, 1, 20).unwrap(),
"USER001",
);
gr.add_item(GoodsReceiptItem::new(
1,
"Test Item",
dec!(100),
dec!(10.00),
));
gr
}
fn create_test_invoice(po_id: Option<&str>) -> VendorInvoice {
let mut inv = VendorInvoice::new(
"VI-001",
"1000",
"VEND001",
"INV-001",
2024,
1,
NaiveDate::from_ymd_opt(2024, 1, 25).unwrap(),
"USER001",
);
inv.purchase_order_id = po_id.map(|s| s.to_string());
inv.add_item(VendorInvoiceItem::new(
1,
"Test Item",
dec!(100),
dec!(10.00),
));
inv
}
#[test]
fn test_quantity_mismatch_injection() {
let config = DocumentFlowAnomalyConfig {
quantity_mismatch_rate: 1.0, ..Default::default()
};
let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
let po = create_test_po();
let mut gr = create_test_gr(&po.header.document_id);
let original_qty = gr.items[0].base.quantity;
let injected = injector.maybe_inject_quantity_mismatch(&mut gr, &po);
assert!(injected);
assert_ne!(gr.items[0].base.quantity, original_qty);
assert_eq!(injector.get_results().len(), 1);
assert_eq!(
injector.get_results()[0].anomaly_type,
DocumentFlowAnomalyType::QuantityMismatch
);
}
#[test]
fn test_maverick_buying_injection() {
let config = DocumentFlowAnomalyConfig {
maverick_buying_rate: 1.0, ..Default::default()
};
let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
let mut invoice = create_test_invoice(Some("PO-001"));
assert!(invoice.purchase_order_id.is_some());
let injected = injector.inject_maverick_buying(&mut invoice);
assert!(injected);
assert!(invoice.purchase_order_id.is_none());
assert_eq!(
injector.get_results()[0].anomaly_type,
DocumentFlowAnomalyType::InvoiceWithoutPO
);
}
#[test]
fn test_statistics() {
let config = DocumentFlowAnomalyConfig {
quantity_mismatch_rate: 1.0,
maverick_buying_rate: 1.0,
..Default::default()
};
let mut injector = DocumentFlowAnomalyInjector::new(config, 42);
let po = create_test_po();
let mut gr = create_test_gr(&po.header.document_id);
injector.maybe_inject_quantity_mismatch(&mut gr, &po);
let mut invoice = create_test_invoice(Some("PO-001"));
injector.inject_maverick_buying(&mut invoice);
let stats = injector.get_statistics();
assert_eq!(stats.total, 2);
assert_eq!(stats.quantity_mismatches, 1);
assert_eq!(stats.maverick_buying, 1);
}
#[test]
fn test_labeled_anomaly_conversion() {
let result = DocumentFlowAnomalyResult {
anomaly_type: DocumentFlowAnomalyType::QuantityMismatch,
description: "Test mismatch".to_string(),
original_value: Some("100".to_string()),
modified_value: Some("120".to_string()),
document_ids: vec!["DOC-001".to_string()],
severity: 3,
};
let labeled = result.to_labeled_anomaly(
"ANO-001",
"DOC-001",
"1000",
NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
);
assert_eq!(labeled.document_id, "DOC-001");
assert_eq!(labeled.company_code, "1000");
assert!(matches!(
labeled.anomaly_type,
AnomalyType::Fraud(FraudType::InvoiceManipulation)
));
}
}