datasynth_eval/benchmarks/
industry.rs

1//! Industry-specific evaluation benchmarks.
2//!
3//! Provides evaluation benchmarks for industry-specific fraud patterns:
4//! - Manufacturing: Yield manipulation, labor fraud, inventory schemes
5//! - Retail: Sweethearting, skimming, refund fraud
6//! - Healthcare: Upcoding, unbundling, kickbacks
7//! - Technology: Revenue manipulation, capitalization fraud
8//! - Financial Services: Loan fraud, trading manipulation
9
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13use super::{
14    BaselineModelType, BaselineResult, BenchmarkBuilder, BenchmarkSuite, BenchmarkTaskType,
15    MetricType,
16};
17
18/// Industry-specific benchmark analysis.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct IndustryBenchmarkAnalysis {
21    /// Industry sector.
22    pub industry: String,
23    /// Industry-specific anomaly count.
24    pub industry_anomaly_count: usize,
25    /// Industry-specific anomaly rate.
26    pub industry_anomaly_rate: f64,
27    /// Transaction type distribution.
28    pub transaction_type_distribution: HashMap<String, usize>,
29    /// Terminology coverage score (0.0-1.0).
30    pub terminology_coverage: f64,
31    /// Regulatory framework alignment score.
32    pub regulatory_alignment: f64,
33    /// Issues found.
34    pub issues: Vec<String>,
35}
36
37impl Default for IndustryBenchmarkAnalysis {
38    fn default() -> Self {
39        Self {
40            industry: String::new(),
41            industry_anomaly_count: 0,
42            industry_anomaly_rate: 0.0,
43            transaction_type_distribution: HashMap::new(),
44            terminology_coverage: 1.0,
45            regulatory_alignment: 1.0,
46            issues: Vec::new(),
47        }
48    }
49}
50
51// =============================================================================
52// Manufacturing Benchmarks
53// =============================================================================
54
55/// Manufacturing-Fraud-5K: Manufacturing-specific fraud detection.
56pub fn manufacturing_fraud_5k() -> BenchmarkSuite {
57    let mut class_dist = HashMap::new();
58    class_dist.insert("normal".to_string(), 4500);
59    class_dist.insert("yield_manipulation".to_string(), 150);
60    class_dist.insert("labor_misallocation".to_string(), 120);
61    class_dist.insert("phantom_production".to_string(), 50);
62    class_dist.insert("standard_cost_manipulation".to_string(), 80);
63    class_dist.insert("inventory_fraud".to_string(), 60);
64    class_dist.insert("scrap_fraud".to_string(), 40);
65
66    BenchmarkBuilder::new("manufacturing-fraud-5k", "Manufacturing-Fraud-5K")
67        .description("5K manufacturing transactions with industry-specific fraud patterns. Tests detection of yield manipulation, labor fraud, and inventory schemes.")
68        .task_type(BenchmarkTaskType::FraudClassification)
69        .dataset_size(5000, 500)
70        .class_distribution(class_dist)
71        .split_ratios(0.7, 0.15, 0.15, true)
72        .primary_metric(MetricType::MacroF1)
73        .metrics(vec![
74            MetricType::AucRoc,
75            MetricType::AucPr,
76            MetricType::MacroF1,
77            MetricType::Recall,
78            MetricType::Precision,
79        ])
80        .seed(11111)
81        .time_span_days(365)
82        .num_companies(2)
83        .add_baseline(BaselineResult {
84            model_name: "RuleBased-MFG".to_string(),
85            model_type: BaselineModelType::RuleBased,
86            metrics: [
87                ("auc_roc".to_string(), 0.68),
88                ("macro_f1".to_string(), 0.35),
89            ].into_iter().collect(),
90            training_time_seconds: Some(0.0),
91            inference_time_ms: Some(0.5),
92            notes: Some("Manufacturing variance analysis rules".to_string()),
93        })
94        .add_baseline(BaselineResult {
95            model_name: "XGBoost-MFG".to_string(),
96            model_type: BaselineModelType::XgBoost,
97            metrics: [
98                ("auc_roc".to_string(), 0.85),
99                ("macro_f1".to_string(), 0.58),
100            ].into_iter().collect(),
101            training_time_seconds: Some(4.0),
102            inference_time_ms: Some(0.05),
103            notes: Some("With BOM/routing features".to_string()),
104        })
105        .metadata("industry", "manufacturing")
106        .metadata("transaction_types", "production_order,material_issue,labor_posting,variance")
107        .metadata("difficulty", "medium")
108        .build()
109}
110
111// =============================================================================
112// Retail Benchmarks
113// =============================================================================
114
115/// Retail-Fraud-10K: Retail-specific fraud detection.
116pub fn retail_fraud_10k() -> BenchmarkSuite {
117    let mut class_dist = HashMap::new();
118    class_dist.insert("normal".to_string(), 9000);
119    class_dist.insert("sweethearting".to_string(), 300);
120    class_dist.insert("skimming".to_string(), 150);
121    class_dist.insert("refund_fraud".to_string(), 250);
122    class_dist.insert("void_abuse".to_string(), 120);
123    class_dist.insert("gift_card_fraud".to_string(), 80);
124    class_dist.insert("employee_discount_abuse".to_string(), 60);
125    class_dist.insert("vendor_kickback".to_string(), 40);
126
127    BenchmarkBuilder::new("retail-fraud-10k", "Retail-Fraud-10K")
128        .description("10K retail POS transactions with industry-specific fraud patterns. Tests detection of sweethearting, skimming, and refund schemes.")
129        .task_type(BenchmarkTaskType::FraudClassification)
130        .dataset_size(10000, 1000)
131        .class_distribution(class_dist)
132        .split_ratios(0.7, 0.15, 0.15, true)
133        .primary_metric(MetricType::AucPr)
134        .metrics(vec![
135            MetricType::AucPr,
136            MetricType::AucRoc,
137            MetricType::MacroF1,
138            MetricType::PrecisionAtK(100),
139            MetricType::Recall,
140        ])
141        .seed(22222)
142        .time_span_days(90)
143        .num_companies(1)
144        .add_baseline(BaselineResult {
145            model_name: "RuleBased-Retail".to_string(),
146            model_type: BaselineModelType::RuleBased,
147            metrics: [
148                ("auc_pr".to_string(), 0.42),
149                ("auc_roc".to_string(), 0.72),
150            ].into_iter().collect(),
151            training_time_seconds: Some(0.0),
152            inference_time_ms: Some(0.2),
153            notes: Some("POS exception analysis rules".to_string()),
154        })
155        .add_baseline(BaselineResult {
156            model_name: "RandomForest-Retail".to_string(),
157            model_type: BaselineModelType::RandomForest,
158            metrics: [
159                ("auc_pr".to_string(), 0.58),
160                ("auc_roc".to_string(), 0.84),
161            ].into_iter().collect(),
162            training_time_seconds: Some(3.0),
163            inference_time_ms: Some(0.1),
164            notes: Some("With cashier behavior features".to_string()),
165        })
166        .add_baseline(BaselineResult {
167            model_name: "LightGBM-Retail".to_string(),
168            model_type: BaselineModelType::LightGbm,
169            metrics: [
170                ("auc_pr".to_string(), 0.68),
171                ("auc_roc".to_string(), 0.90),
172            ].into_iter().collect(),
173            training_time_seconds: Some(2.0),
174            inference_time_ms: Some(0.05),
175            notes: Some("Optimized with temporal features".to_string()),
176        })
177        .metadata("industry", "retail")
178        .metadata("transaction_types", "pos_sale,return,void,discount,gift_card")
179        .metadata("difficulty", "medium")
180        .build()
181}
182
183// =============================================================================
184// Healthcare Benchmarks
185// =============================================================================
186
187/// Healthcare-Fraud-5K: Healthcare revenue cycle fraud detection.
188pub fn healthcare_fraud_5k() -> BenchmarkSuite {
189    let mut class_dist = HashMap::new();
190    class_dist.insert("normal".to_string(), 4500);
191    class_dist.insert("upcoding".to_string(), 150);
192    class_dist.insert("unbundling".to_string(), 100);
193    class_dist.insert("phantom_billing".to_string(), 50);
194    class_dist.insert("duplicate_billing".to_string(), 80);
195    class_dist.insert("kickback".to_string(), 40);
196    class_dist.insert("medical_necessity_abuse".to_string(), 60);
197    class_dist.insert("dme_fraud".to_string(), 20);
198
199    BenchmarkBuilder::new("healthcare-fraud-5k", "Healthcare-Fraud-5K")
200        .description("5K healthcare revenue cycle transactions with industry-specific fraud patterns. Tests detection of upcoding, unbundling, and kickbacks under HIPAA/Stark/FCA compliance.")
201        .task_type(BenchmarkTaskType::FraudClassification)
202        .dataset_size(5000, 500)
203        .class_distribution(class_dist)
204        .split_ratios(0.7, 0.15, 0.15, true)
205        .primary_metric(MetricType::AucPr)
206        .metrics(vec![
207            MetricType::AucPr,
208            MetricType::AucRoc,
209            MetricType::MacroF1,
210            MetricType::Recall,
211            MetricType::PrecisionAtK(50),
212        ])
213        .seed(33333)
214        .time_span_days(365)
215        .num_companies(1)
216        .add_baseline(BaselineResult {
217            model_name: "NCCI-Edits".to_string(),
218            model_type: BaselineModelType::RuleBased,
219            metrics: [
220                ("auc_pr".to_string(), 0.35),
221                ("auc_roc".to_string(), 0.65),
222            ].into_iter().collect(),
223            training_time_seconds: Some(0.0),
224            inference_time_ms: Some(1.0),
225            notes: Some("CMS NCCI edit-based detection".to_string()),
226        })
227        .add_baseline(BaselineResult {
228            model_name: "ClaimAnalytics".to_string(),
229            model_type: BaselineModelType::RandomForest,
230            metrics: [
231                ("auc_pr".to_string(), 0.52),
232                ("auc_roc".to_string(), 0.80),
233            ].into_iter().collect(),
234            training_time_seconds: Some(5.0),
235            inference_time_ms: Some(0.2),
236            notes: Some("With ICD-10/CPT coding features".to_string()),
237        })
238        .add_baseline(BaselineResult {
239            model_name: "DeepClaim".to_string(),
240            model_type: BaselineModelType::NeuralNetwork,
241            metrics: [
242                ("auc_pr".to_string(), 0.65),
243                ("auc_roc".to_string(), 0.88),
244            ].into_iter().collect(),
245            training_time_seconds: Some(30.0),
246            inference_time_ms: Some(2.0),
247            notes: Some("Embedding-based claim analysis".to_string()),
248        })
249        .metadata("industry", "healthcare")
250        .metadata("regulatory_framework", "hipaa,stark,anti_kickback,fca")
251        .metadata("coding_systems", "icd10,cpt,drg,hcpcs")
252        .metadata("difficulty", "hard")
253        .build()
254}
255
256// =============================================================================
257// Technology Benchmarks
258// =============================================================================
259
260/// Technology-Fraud-3K: Technology revenue recognition fraud.
261pub fn technology_fraud_3k() -> BenchmarkSuite {
262    let mut class_dist = HashMap::new();
263    class_dist.insert("normal".to_string(), 2700);
264    class_dist.insert("premature_revenue".to_string(), 100);
265    class_dist.insert("side_letter_abuse".to_string(), 60);
266    class_dist.insert("channel_stuffing".to_string(), 50);
267    class_dist.insert("improper_capitalization".to_string(), 50);
268    class_dist.insert("useful_life_manipulation".to_string(), 40);
269
270    BenchmarkBuilder::new("technology-fraud-3k", "Technology-Fraud-3K")
271        .description("3K technology sector transactions with SaaS/license revenue and R&D fraud patterns. Tests detection of ASC 606 violations and improper capitalization.")
272        .task_type(BenchmarkTaskType::FraudClassification)
273        .dataset_size(3000, 300)
274        .class_distribution(class_dist)
275        .split_ratios(0.7, 0.15, 0.15, true)
276        .primary_metric(MetricType::AucPr)
277        .metrics(vec![
278            MetricType::AucPr,
279            MetricType::AucRoc,
280            MetricType::MacroF1,
281            MetricType::Recall,
282        ])
283        .seed(44444)
284        .time_span_days(730) // 2 years for revenue recognition
285        .num_companies(2)
286        .add_baseline(BaselineResult {
287            model_name: "RevenueRules".to_string(),
288            model_type: BaselineModelType::RuleBased,
289            metrics: [
290                ("auc_pr".to_string(), 0.30),
291                ("auc_roc".to_string(), 0.62),
292            ].into_iter().collect(),
293            training_time_seconds: Some(0.0),
294            inference_time_ms: Some(0.5),
295            notes: Some("ASC 606 compliance rules".to_string()),
296        })
297        .add_baseline(BaselineResult {
298            model_name: "ContractML".to_string(),
299            model_type: BaselineModelType::XgBoost,
300            metrics: [
301                ("auc_pr".to_string(), 0.48),
302                ("auc_roc".to_string(), 0.78),
303            ].into_iter().collect(),
304            training_time_seconds: Some(3.0),
305            inference_time_ms: Some(0.1),
306            notes: Some("With contract/performance obligation features".to_string()),
307        })
308        .add_baseline(BaselineResult {
309            model_name: "TemporalLGBM".to_string(),
310            model_type: BaselineModelType::LightGbm,
311            metrics: [
312                ("auc_pr".to_string(), 0.58),
313                ("auc_roc".to_string(), 0.85),
314            ].into_iter().collect(),
315            training_time_seconds: Some(4.0),
316            inference_time_ms: Some(0.08),
317            notes: Some("With temporal revenue patterns".to_string()),
318        })
319        .metadata("industry", "technology")
320        .metadata("revenue_standards", "asc_606,asc_985")
321        .metadata("difficulty", "hard")
322        .build()
323}
324
325// =============================================================================
326// Financial Services Benchmarks
327// =============================================================================
328
329/// FinancialServices-Fraud-5K: Financial services fraud detection.
330pub fn financial_services_fraud_5k() -> BenchmarkSuite {
331    let mut class_dist = HashMap::new();
332    class_dist.insert("normal".to_string(), 4500);
333    class_dist.insert("loan_fraud".to_string(), 150);
334    class_dist.insert("trading_fraud".to_string(), 100);
335    class_dist.insert("account_manipulation".to_string(), 80);
336    class_dist.insert("insurance_fraud".to_string(), 100);
337    class_dist.insert("fee_fraud".to_string(), 70);
338
339    BenchmarkBuilder::new("financial-services-fraud-5k", "FinancialServices-Fraud-5K")
340        .description("5K financial services transactions with banking, insurance, and investment fraud patterns. Tests detection under regulatory frameworks (Basel, Solvency, SEC).")
341        .task_type(BenchmarkTaskType::FraudClassification)
342        .dataset_size(5000, 500)
343        .class_distribution(class_dist)
344        .split_ratios(0.7, 0.15, 0.15, true)
345        .primary_metric(MetricType::AucPr)
346        .metrics(vec![
347            MetricType::AucPr,
348            MetricType::AucRoc,
349            MetricType::MacroF1,
350            MetricType::Recall,
351            MetricType::PrecisionAtK(50),
352        ])
353        .seed(55555)
354        .time_span_days(365)
355        .num_companies(1)
356        .add_baseline(BaselineResult {
357            model_name: "ComplianceRules".to_string(),
358            model_type: BaselineModelType::RuleBased,
359            metrics: [
360                ("auc_pr".to_string(), 0.38),
361                ("auc_roc".to_string(), 0.70),
362            ].into_iter().collect(),
363            training_time_seconds: Some(0.0),
364            inference_time_ms: Some(0.5),
365            notes: Some("Regulatory compliance rules".to_string()),
366        })
367        .add_baseline(BaselineResult {
368            model_name: "FraudNet".to_string(),
369            model_type: BaselineModelType::RandomForest,
370            metrics: [
371                ("auc_pr".to_string(), 0.55),
372                ("auc_roc".to_string(), 0.83),
373            ].into_iter().collect(),
374            training_time_seconds: Some(5.0),
375            inference_time_ms: Some(0.15),
376            notes: Some("With account behavior features".to_string()),
377        })
378        .add_baseline(BaselineResult {
379            model_name: "DeepFraud".to_string(),
380            model_type: BaselineModelType::NeuralNetwork,
381            metrics: [
382                ("auc_pr".to_string(), 0.68),
383                ("auc_roc".to_string(), 0.91),
384            ].into_iter().collect(),
385            training_time_seconds: Some(45.0),
386            inference_time_ms: Some(3.0),
387            notes: Some("LSTM-based sequence model".to_string()),
388        })
389        .metadata("industry", "financial_services")
390        .metadata("regulatory_framework", "basel,sec,finra")
391        .metadata("difficulty", "hard")
392        .build()
393}
394
395/// Get all industry-specific benchmarks.
396pub fn all_industry_benchmarks() -> Vec<BenchmarkSuite> {
397    vec![
398        manufacturing_fraud_5k(),
399        retail_fraud_10k(),
400        healthcare_fraud_5k(),
401        technology_fraud_3k(),
402        financial_services_fraud_5k(),
403    ]
404}
405
406/// Get benchmarks for a specific industry.
407pub fn get_industry_benchmark(industry: &str) -> Option<BenchmarkSuite> {
408    match industry.to_lowercase().as_str() {
409        "manufacturing" => Some(manufacturing_fraud_5k()),
410        "retail" => Some(retail_fraud_10k()),
411        "healthcare" => Some(healthcare_fraud_5k()),
412        "technology" => Some(technology_fraud_3k()),
413        "financial_services" | "financialservices" => Some(financial_services_fraud_5k()),
414        _ => None,
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421
422    #[test]
423    fn test_manufacturing_fraud_5k() {
424        let bench = manufacturing_fraud_5k();
425        assert_eq!(bench.id, "manufacturing-fraud-5k");
426        assert_eq!(bench.dataset.total_records, 5000);
427        assert!(bench
428            .dataset
429            .class_distribution
430            .contains_key("yield_manipulation"));
431        assert_eq!(
432            bench.metadata.get("industry"),
433            Some(&"manufacturing".to_string())
434        );
435    }
436
437    #[test]
438    fn test_retail_fraud_10k() {
439        let bench = retail_fraud_10k();
440        assert_eq!(bench.id, "retail-fraud-10k");
441        assert_eq!(bench.dataset.total_records, 10000);
442        assert!(bench
443            .dataset
444            .class_distribution
445            .contains_key("sweethearting"));
446        assert!(bench.dataset.class_distribution.contains_key("skimming"));
447    }
448
449    #[test]
450    fn test_healthcare_fraud_5k() {
451        let bench = healthcare_fraud_5k();
452        assert_eq!(bench.id, "healthcare-fraud-5k");
453        assert!(bench.dataset.class_distribution.contains_key("upcoding"));
454        assert!(bench.dataset.class_distribution.contains_key("unbundling"));
455        assert!(bench.metadata.contains_key("regulatory_framework"));
456    }
457
458    #[test]
459    fn test_technology_fraud_3k() {
460        let bench = technology_fraud_3k();
461        assert_eq!(bench.id, "technology-fraud-3k");
462        assert!(bench
463            .dataset
464            .class_distribution
465            .contains_key("premature_revenue"));
466        assert!(bench
467            .dataset
468            .class_distribution
469            .contains_key("channel_stuffing"));
470    }
471
472    #[test]
473    fn test_financial_services_fraud_5k() {
474        let bench = financial_services_fraud_5k();
475        assert_eq!(bench.id, "financial-services-fraud-5k");
476        assert!(bench.dataset.class_distribution.contains_key("loan_fraud"));
477        assert!(bench
478            .dataset
479            .class_distribution
480            .contains_key("trading_fraud"));
481    }
482
483    #[test]
484    fn test_all_industry_benchmarks() {
485        let benchmarks = all_industry_benchmarks();
486        assert_eq!(benchmarks.len(), 5);
487
488        // All should have industry metadata
489        for bench in &benchmarks {
490            assert!(bench.metadata.contains_key("industry"));
491        }
492    }
493
494    #[test]
495    fn test_get_industry_benchmark() {
496        assert!(get_industry_benchmark("manufacturing").is_some());
497        assert!(get_industry_benchmark("retail").is_some());
498        assert!(get_industry_benchmark("healthcare").is_some());
499        assert!(get_industry_benchmark("technology").is_some());
500        assert!(get_industry_benchmark("financial_services").is_some());
501        assert!(get_industry_benchmark("unknown").is_none());
502    }
503
504    #[test]
505    fn test_industry_benchmark_analysis_default() {
506        let analysis = IndustryBenchmarkAnalysis::default();
507        assert!(analysis.industry.is_empty());
508        assert_eq!(analysis.terminology_coverage, 1.0);
509        assert!(analysis.issues.is_empty());
510    }
511}
datasynth_eval/benchmarks/industry.rs

datasynth_eval/benchmarks/
industry.rs