Skip to main content

datasynth_eval/banking/
cross_layer_coherence.rs

1//! Cross-layer coherence evaluator.
2//!
3//! Validates that banking transactions linked to document-flow Payments
4//! maintain referential integrity and label consistency:
5//! - Every bank transaction with `source_payment_id` must reference an existing Payment
6//! - Fraud labels must propagate: `Payment.is_fraud → BankTransaction.is_suspicious`
7//! - Mirror transactions must have consistent amounts, inverse directions
8//! - GL cash accounts must be present and consistent
9
10use std::collections::{HashMap, HashSet};
11
12use serde::{Deserialize, Serialize};
13
14use crate::error::EvalResult;
15
16/// Summary of a Payment for cross-layer validation.
17#[derive(Debug, Clone)]
18pub struct PaymentRef {
19    pub payment_id: String,
20    pub amount: f64,
21    pub is_fraud: bool,
22    pub journal_entry_id: Option<String>,
23}
24
25/// Summary of a BankTransaction's cross-layer links.
26#[derive(Debug, Clone)]
27pub struct BankTxnLinks {
28    pub transaction_id: String,
29    pub source_payment_id: Option<String>,
30    pub source_invoice_id: Option<String>,
31    pub journal_entry_id: Option<String>,
32    pub gl_cash_account: Option<String>,
33    pub is_suspicious: bool,
34    pub is_outbound: bool,
35    pub amount: f64,
36    pub parent_transaction_id: Option<String>,
37}
38
39/// Thresholds for cross-layer coherence.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CrossLayerThresholds {
42    /// Maximum fraction of bridged txns with dangling payment references
43    pub max_dangling_payment_rate: f64,
44    /// Minimum fraction of Payment.is_fraud cases propagated to BankTransaction.is_suspicious
45    pub min_fraud_propagation_rate: f64,
46    /// Maximum fraction of bridged txns missing gl_cash_account
47    pub max_missing_gl_rate: f64,
48    /// Maximum amount deviation allowed between payment and bridged txn
49    pub max_amount_deviation: f64,
50}
51
52impl Default for CrossLayerThresholds {
53    fn default() -> Self {
54        Self {
55            max_dangling_payment_rate: 0.0,
56            min_fraud_propagation_rate: 0.95,
57            max_missing_gl_rate: 0.01,
58            max_amount_deviation: 0.01,
59        }
60    }
61}
62
63/// Cross-layer coherence analysis result.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct CrossLayerCoherenceAnalysis {
66    /// Total bank transactions examined
67    pub total_bank_transactions: usize,
68    /// Bank transactions with source_payment_id populated (bridged)
69    pub bridged_transactions: usize,
70    /// Bridged transactions whose payment_id doesn't exist in payments
71    pub dangling_payment_refs: usize,
72    /// Payment IDs that are fraudulent but had no suspicious bank txn
73    pub unpropagated_fraud_payments: usize,
74    /// Total fraudulent payments observed
75    pub total_fraud_payments: usize,
76    /// Bridged transactions missing gl_cash_account
77    pub missing_gl_account: usize,
78    /// Bridged transactions with amount deviation > threshold
79    pub amount_mismatches: usize,
80    /// Mirror transactions (with parent_transaction_id)
81    pub mirror_transactions: usize,
82    /// Fraud propagation rate
83    pub fraud_propagation_rate: f64,
84    /// Overall pass/fail
85    pub passes: bool,
86    pub issues: Vec<String>,
87}
88
89/// Cross-layer coherence analyzer.
90pub struct CrossLayerCoherenceAnalyzer {
91    pub thresholds: CrossLayerThresholds,
92}
93
94impl CrossLayerCoherenceAnalyzer {
95    pub fn new() -> Self {
96        Self {
97            thresholds: CrossLayerThresholds::default(),
98        }
99    }
100
101    pub fn with_thresholds(thresholds: CrossLayerThresholds) -> Self {
102        Self { thresholds }
103    }
104
105    /// Analyze cross-layer coherence between payments and bank transactions.
106    pub fn analyze(
107        &self,
108        payments: &[PaymentRef],
109        bank_txns: &[BankTxnLinks],
110    ) -> EvalResult<CrossLayerCoherenceAnalysis> {
111        let payment_by_id: HashMap<&str, &PaymentRef> = payments
112            .iter()
113            .map(|p| (p.payment_id.as_str(), p))
114            .collect();
115        let total_fraud_payments = payments.iter().filter(|p| p.is_fraud).count();
116
117        let mut bridged_count = 0usize;
118        let mut dangling = 0usize;
119        let mut missing_gl = 0usize;
120        let mut mismatches = 0usize;
121        let mut mirror_count = 0usize;
122        // Track which fraud payments had at least one suspicious bank txn
123        let mut fraud_payments_with_suspicious_txn: HashSet<&str> = HashSet::new();
124
125        for txn in bank_txns {
126            if txn.parent_transaction_id.is_some() {
127                mirror_count += 1;
128            }
129            let Some(ref pid) = txn.source_payment_id else {
130                continue;
131            };
132            bridged_count += 1;
133
134            match payment_by_id.get(pid.as_str()) {
135                None => {
136                    dangling += 1;
137                }
138                Some(payment) => {
139                    // Amount match (bridged should have same amount as payment)
140                    let deviation =
141                        (payment.amount - txn.amount).abs() / payment.amount.abs().max(1.0);
142                    if deviation > self.thresholds.max_amount_deviation {
143                        mismatches += 1;
144                    }
145                    // Track fraud propagation
146                    if payment.is_fraud && txn.is_suspicious {
147                        fraud_payments_with_suspicious_txn.insert(pid.as_str());
148                    }
149                }
150            }
151
152            if txn.gl_cash_account.is_none() {
153                missing_gl += 1;
154            }
155        }
156
157        let unpropagated_fraud_payments =
158            total_fraud_payments.saturating_sub(fraud_payments_with_suspicious_txn.len());
159
160        let fraud_propagation_rate = if total_fraud_payments > 0 {
161            fraud_payments_with_suspicious_txn.len() as f64 / total_fraud_payments as f64
162        } else {
163            1.0
164        };
165
166        let dangling_rate = if bridged_count > 0 {
167            dangling as f64 / bridged_count as f64
168        } else {
169            0.0
170        };
171        let missing_gl_rate = if bridged_count > 0 {
172            missing_gl as f64 / bridged_count as f64
173        } else {
174            0.0
175        };
176
177        let mut issues = Vec::new();
178        if dangling_rate > self.thresholds.max_dangling_payment_rate {
179            issues.push(format!(
180                "{dangling} bridged bank transactions reference non-existent payments ({:.2}%)",
181                dangling_rate * 100.0
182            ));
183        }
184        if total_fraud_payments > 0
185            && fraud_propagation_rate < self.thresholds.min_fraud_propagation_rate
186        {
187            issues.push(format!(
188                "Fraud propagation rate {:.1}% below minimum {:.1}% ({} of {} fraud payments had no suspicious bank txn)",
189                fraud_propagation_rate * 100.0,
190                self.thresholds.min_fraud_propagation_rate * 100.0,
191                unpropagated_fraud_payments,
192                total_fraud_payments,
193            ));
194        }
195        if missing_gl_rate > self.thresholds.max_missing_gl_rate {
196            issues.push(format!(
197                "{missing_gl} bridged transactions missing gl_cash_account ({:.2}%)",
198                missing_gl_rate * 100.0
199            ));
200        }
201        if mismatches > 0 {
202            issues.push(format!(
203                "{mismatches} bridged transactions have amount deviation > {:.2}% from their payment",
204                self.thresholds.max_amount_deviation * 100.0
205            ));
206        }
207
208        Ok(CrossLayerCoherenceAnalysis {
209            total_bank_transactions: bank_txns.len(),
210            bridged_transactions: bridged_count,
211            dangling_payment_refs: dangling,
212            unpropagated_fraud_payments,
213            total_fraud_payments,
214            missing_gl_account: missing_gl,
215            amount_mismatches: mismatches,
216            mirror_transactions: mirror_count,
217            fraud_propagation_rate,
218            passes: issues.is_empty(),
219            issues,
220        })
221    }
222}
223
224impl Default for CrossLayerCoherenceAnalyzer {
225    fn default() -> Self {
226        Self::new()
227    }
228}
229
230#[cfg(test)]
231#[allow(clippy::unwrap_used)]
232mod tests {
233    use super::*;
234
235    #[test]
236    fn test_clean_coherence_passes() {
237        let payments = vec![
238            PaymentRef {
239                payment_id: "PAY-1".into(),
240                amount: 1000.0,
241                is_fraud: false,
242                journal_entry_id: Some("JE-1".into()),
243            },
244            PaymentRef {
245                payment_id: "PAY-2".into(),
246                amount: 500.0,
247                is_fraud: true,
248                journal_entry_id: Some("JE-2".into()),
249            },
250        ];
251        let bank_txns = vec![
252            BankTxnLinks {
253                transaction_id: "BT-1".into(),
254                source_payment_id: Some("PAY-1".into()),
255                source_invoice_id: None,
256                journal_entry_id: Some("JE-1".into()),
257                gl_cash_account: Some("100000".into()),
258                is_suspicious: false,
259                is_outbound: true,
260                amount: 1000.0,
261                parent_transaction_id: None,
262            },
263            BankTxnLinks {
264                transaction_id: "BT-2".into(),
265                source_payment_id: Some("PAY-2".into()),
266                source_invoice_id: None,
267                journal_entry_id: Some("JE-2".into()),
268                gl_cash_account: Some("100000".into()),
269                is_suspicious: true, // fraud propagated
270                is_outbound: true,
271                amount: 500.0,
272                parent_transaction_id: None,
273            },
274        ];
275
276        let analyzer = CrossLayerCoherenceAnalyzer::new();
277        let result = analyzer.analyze(&payments, &bank_txns).unwrap();
278        assert!(result.passes, "Issues: {:?}", result.issues);
279        assert_eq!(result.bridged_transactions, 2);
280        assert_eq!(result.dangling_payment_refs, 0);
281        assert!((result.fraud_propagation_rate - 1.0).abs() < 1e-9);
282    }
283
284    #[test]
285    fn test_dangling_payment_ref_detected() {
286        let payments = vec![PaymentRef {
287            payment_id: "PAY-1".into(),
288            amount: 1000.0,
289            is_fraud: false,
290            journal_entry_id: None,
291        }];
292        let bank_txns = vec![BankTxnLinks {
293            transaction_id: "BT-1".into(),
294            source_payment_id: Some("PAY-999".into()), // doesn't exist!
295            source_invoice_id: None,
296            journal_entry_id: None,
297            gl_cash_account: Some("100000".into()),
298            is_suspicious: false,
299            is_outbound: true,
300            amount: 1000.0,
301            parent_transaction_id: None,
302        }];
303
304        let analyzer = CrossLayerCoherenceAnalyzer::new();
305        let result = analyzer.analyze(&payments, &bank_txns).unwrap();
306        assert!(!result.passes);
307        assert_eq!(result.dangling_payment_refs, 1);
308    }
309
310    #[test]
311    fn test_fraud_propagation_failure_detected() {
312        // Fraudulent payment but the linked bank txn is NOT suspicious
313        let payments = vec![PaymentRef {
314            payment_id: "PAY-1".into(),
315            amount: 1000.0,
316            is_fraud: true,
317            journal_entry_id: None,
318        }];
319        let bank_txns = vec![BankTxnLinks {
320            transaction_id: "BT-1".into(),
321            source_payment_id: Some("PAY-1".into()),
322            source_invoice_id: None,
323            journal_entry_id: None,
324            gl_cash_account: Some("100000".into()),
325            is_suspicious: false, // BUG: fraud not propagated
326            is_outbound: true,
327            amount: 1000.0,
328            parent_transaction_id: None,
329        }];
330
331        let analyzer = CrossLayerCoherenceAnalyzer::new();
332        let result = analyzer.analyze(&payments, &bank_txns).unwrap();
333        assert!(!result.passes);
334        assert!((result.fraud_propagation_rate - 0.0).abs() < 1e-9);
335        assert_eq!(result.unpropagated_fraud_payments, 1);
336    }
337}