Skip to main content

datasynth_generators/sourcing/
spend_analysis_generator.rs

1//! Spend analysis generator.
2//!
3//! Analyzes vendor spend to identify sourcing opportunities.
4
5use datasynth_config::schema::SpendAnalysisConfig;
6use datasynth_core::models::sourcing::{SpendAnalysis, VendorSpendShare};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use rust_decimal::Decimal;
11
12/// Generates spend analysis records from vendor pool and transaction data.
13pub struct SpendAnalysisGenerator {
14    rng: ChaCha8Rng,
15    config: SpendAnalysisConfig,
16}
17
18impl SpendAnalysisGenerator {
19    /// Create a new spend analysis generator.
20    pub fn new(seed: u64) -> Self {
21        Self {
22            rng: seeded_rng(seed, 0),
23            config: SpendAnalysisConfig::default(),
24        }
25    }
26
27    /// Create with custom configuration.
28    pub fn with_config(seed: u64, config: SpendAnalysisConfig) -> Self {
29        Self {
30            rng: seeded_rng(seed, 0),
31            config,
32        }
33    }
34
35    /// Generate spend analysis for a set of vendor-category pairs.
36    ///
37    /// # Arguments
38    /// * `company_code` - Company code
39    /// * `vendor_ids` - Available vendor IDs
40    /// * `categories` - Spend categories (category_id, category_name)
41    /// * `fiscal_year` - Analysis period
42    pub fn generate(
43        &mut self,
44        company_code: &str,
45        vendor_ids: &[String],
46        categories: &[(String, String)],
47        fiscal_year: u16,
48    ) -> Vec<SpendAnalysis> {
49        let mut analyses = Vec::new();
50
51        for (cat_id, cat_name) in categories {
52            // Assign random vendors to this category
53            let vendor_count = self.rng.random_range(3..=vendor_ids.len().min(15));
54            let mut cat_vendors: Vec<&String> =
55                vendor_ids.sample(&mut self.rng, vendor_count).collect();
56            cat_vendors.shuffle(&mut self.rng);
57
58            // Generate spend shares using Pareto-like distribution
59            let mut raw_shares: Vec<f64> = (0..cat_vendors.len())
60                .map(|i| 1.0 / ((i as f64 + 1.0).powf(0.8)))
61                .collect();
62            let total: f64 = raw_shares.iter().sum();
63            for s in &mut raw_shares {
64                *s /= total;
65            }
66
67            let total_spend = Decimal::from(self.rng.random_range(100_000i64..=5_000_000));
68            let transaction_count = self.rng.random_range(50..=2000);
69
70            // Calculate HHI
71            let hhi: f64 = raw_shares.iter().map(|s| (s * 100.0).powi(2)).sum();
72
73            let contract_coverage = self.rng.random_range(0.3..=0.95);
74            let preferred_coverage = contract_coverage * self.rng.random_range(0.7..=1.0);
75
76            let vendor_shares: Vec<VendorSpendShare> = cat_vendors
77                .iter()
78                .zip(raw_shares.iter())
79                .map(|(vid, share)| VendorSpendShare {
80                    vendor_id: vid.to_string(),
81                    vendor_name: format!("Vendor {vid}"),
82                    spend_amount: Decimal::from_f64_retain(
83                        total_spend.to_string().parse::<f64>().unwrap_or(0.0) * share,
84                    )
85                    .unwrap_or(Decimal::ZERO),
86                    share: *share,
87                    is_preferred: *share > 0.15 && self.rng.random_bool(preferred_coverage),
88                })
89                .collect();
90
91            analyses.push(SpendAnalysis {
92                category_id: cat_id.clone(),
93                category_name: cat_name.clone(),
94                company_code: company_code.to_string(),
95                total_spend,
96                vendor_count: cat_vendors.len() as u32,
97                transaction_count,
98                hhi_index: hhi,
99                vendor_shares,
100                contract_coverage,
101                preferred_vendor_coverage: preferred_coverage,
102                price_trend_pct: self.rng.random_range(-0.05..=0.10),
103                fiscal_year,
104            });
105        }
106
107        analyses
108    }
109
110    /// Get the HHI threshold from config.
111    pub fn hhi_threshold(&self) -> f64 {
112        self.config.hhi_threshold
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    fn test_vendor_ids() -> Vec<String> {
121        (1..=10).map(|i| format!("V{:04}", i)).collect()
122    }
123
124    fn test_categories() -> Vec<(String, String)> {
125        vec![
126            ("CAT-001".to_string(), "Office Supplies".to_string()),
127            ("CAT-002".to_string(), "IT Equipment".to_string()),
128        ]
129    }
130
131    #[test]
132    fn test_basic_generation() {
133        let mut gen = SpendAnalysisGenerator::new(42);
134        let results = gen.generate("C001", &test_vendor_ids(), &test_categories(), 2024);
135
136        assert_eq!(results.len(), 2);
137        for analysis in &results {
138            assert_eq!(analysis.company_code, "C001");
139            assert_eq!(analysis.fiscal_year, 2024);
140            assert!(!analysis.category_id.is_empty());
141            assert!(!analysis.category_name.is_empty());
142            assert!(analysis.vendor_count > 0);
143            assert!(analysis.transaction_count > 0);
144            assert!(analysis.total_spend > Decimal::ZERO);
145            assert!(analysis.hhi_index > 0.0);
146            assert!(!analysis.vendor_shares.is_empty());
147        }
148    }
149
150    #[test]
151    fn test_deterministic() {
152        let mut gen1 = SpendAnalysisGenerator::new(42);
153        let mut gen2 = SpendAnalysisGenerator::new(42);
154        let vendors = test_vendor_ids();
155        let cats = test_categories();
156
157        let r1 = gen1.generate("C001", &vendors, &cats, 2024);
158        let r2 = gen2.generate("C001", &vendors, &cats, 2024);
159
160        assert_eq!(r1.len(), r2.len());
161        for (a, b) in r1.iter().zip(r2.iter()) {
162            assert_eq!(a.category_id, b.category_id);
163            assert_eq!(a.total_spend, b.total_spend);
164            assert_eq!(a.vendor_count, b.vendor_count);
165            assert_eq!(a.transaction_count, b.transaction_count);
166        }
167    }
168
169    #[test]
170    fn test_field_constraints() {
171        let mut gen = SpendAnalysisGenerator::new(99);
172        let results = gen.generate("C001", &test_vendor_ids(), &test_categories(), 2024);
173
174        for analysis in &results {
175            // Shares should sum to approximately 1.0
176            let share_sum: f64 = analysis.vendor_shares.iter().map(|s| s.share).sum();
177            assert!(
178                (share_sum - 1.0).abs() < 0.01,
179                "shares should sum to ~1.0, got {}",
180                share_sum
181            );
182
183            // Contract coverage and price trend should be in valid range
184            assert!(analysis.contract_coverage >= 0.0 && analysis.contract_coverage <= 1.0);
185            assert!(
186                analysis.preferred_vendor_coverage >= 0.0
187                    && analysis.preferred_vendor_coverage <= 1.0
188            );
189            assert!(analysis.price_trend_pct >= -0.05 && analysis.price_trend_pct <= 0.10);
190
191            // Each vendor share should have a non-empty vendor_id
192            for vs in &analysis.vendor_shares {
193                assert!(!vs.vendor_id.is_empty());
194            }
195        }
196    }
197
198    #[test]
199    fn test_hhi_threshold() {
200        let gen = SpendAnalysisGenerator::new(42);
201        assert_eq!(gen.hhi_threshold(), 2500.0);
202    }
203}