Skip to main content

datasynth_core/distributions/
industry_profiles.rs

1//! Industry-specific amount distribution profiles.
2//!
3//! Pre-configured distribution profiles for different industries based on
4//! typical transaction patterns observed in each sector.
5
6use super::mixture::{LogNormalComponent, LogNormalMixtureConfig};
7use super::pareto::ParetoConfig;
8use super::weibull::WeibullConfig;
9use serde::{Deserialize, Serialize};
10
11/// Industry type for profile selection.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
13#[serde(rename_all = "snake_case")]
14pub enum IndustryType {
15    /// Retail industry (B2C, POS transactions)
16    Retail,
17    /// Manufacturing industry (B2B, production)
18    #[default]
19    Manufacturing,
20    /// Financial services (banking, insurance)
21    FinancialServices,
22    /// Healthcare
23    Healthcare,
24    /// Technology / SaaS
25    Technology,
26    /// Wholesale / Distribution
27    Wholesale,
28    /// Professional Services
29    ProfessionalServices,
30    /// Construction
31    Construction,
32}
33
34/// Complete industry amount profile.
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct IndustryAmountProfile {
37    /// Industry type
38    pub industry: IndustryType,
39    /// Sales/revenue transaction amounts
40    pub sales_amounts: LogNormalMixtureConfig,
41    /// Purchase transaction amounts
42    pub purchase_amounts: LogNormalMixtureConfig,
43    /// Payroll transaction amounts
44    pub payroll_amounts: LogNormalMixtureConfig,
45    /// Capital expenditure amounts (heavy-tailed)
46    pub capex_amounts: ParetoConfig,
47    /// Days-to-payment distribution
48    pub days_to_payment: WeibullConfig,
49    /// Seasonality multipliers by month (Jan=0, Dec=11)
50    pub seasonality: [f64; 12],
51    /// Typical line item count range
52    pub line_item_range: (u8, u8),
53    /// Average transaction volume per day
54    pub avg_daily_transactions: u32,
55}
56
57impl Default for IndustryAmountProfile {
58    fn default() -> Self {
59        Self::manufacturing()
60    }
61}
62
63impl IndustryAmountProfile {
64    /// Create a retail industry profile.
65    ///
66    /// Characteristics:
67    /// - High volume of small POS transactions
68    /// - Mixture of cash registers, online, and returns
69    /// - Strong seasonality (Q4 spike)
70    /// - Fast payment terms
71    pub fn retail() -> Self {
72        Self {
73            industry: IndustryType::Retail,
74            sales_amounts: LogNormalMixtureConfig {
75                components: vec![
76                    // POS transactions (60%) - small purchases
77                    LogNormalComponent::with_label(0.60, 3.5, 1.0, "pos_small"),
78                    // Medium purchases (25%) - grocery, apparel
79                    LogNormalComponent::with_label(0.25, 4.5, 0.8, "medium"),
80                    // Large purchases (10%) - electronics, furniture
81                    LogNormalComponent::with_label(0.10, 6.0, 1.2, "large"),
82                    // High-value (5%) - luxury items
83                    LogNormalComponent::with_label(0.05, 7.5, 0.9, "luxury"),
84                ],
85                min_value: 0.01,
86                max_value: Some(50_000.0),
87                decimal_places: 2,
88            },
89            purchase_amounts: LogNormalMixtureConfig {
90                components: vec![
91                    // Regular inventory orders (70%)
92                    LogNormalComponent::with_label(0.70, 7.0, 1.5, "inventory"),
93                    // Large bulk orders (25%)
94                    LogNormalComponent::with_label(0.25, 9.0, 1.0, "bulk"),
95                    // Special/seasonal (5%)
96                    LogNormalComponent::with_label(0.05, 10.0, 0.8, "seasonal"),
97                ],
98                min_value: 100.0,
99                max_value: Some(1_000_000.0),
100                decimal_places: 2,
101            },
102            payroll_amounts: LogNormalMixtureConfig {
103                components: vec![
104                    // Hourly/part-time (60%)
105                    LogNormalComponent::with_label(0.60, 6.5, 0.6, "hourly"),
106                    // Full-time staff (35%)
107                    LogNormalComponent::with_label(0.35, 7.5, 0.5, "salary"),
108                    // Management (5%)
109                    LogNormalComponent::with_label(0.05, 8.5, 0.4, "management"),
110                ],
111                min_value: 200.0,
112                max_value: Some(50_000.0),
113                decimal_places: 2,
114            },
115            capex_amounts: ParetoConfig {
116                alpha: 2.0,
117                x_min: 5_000.0,
118                max_value: Some(500_000.0),
119                decimal_places: 2,
120            },
121            days_to_payment: WeibullConfig::days_to_payment(),
122            seasonality: [
123                0.75, // Jan - post-holiday lull
124                0.70, // Feb
125                0.85, // Mar
126                0.90, // Apr
127                0.95, // May
128                0.90, // Jun
129                0.85, // Jul
130                0.90, // Aug - back to school
131                0.95, // Sep
132                1.10, // Oct
133                1.40, // Nov - Black Friday
134                1.75, // Dec - Holiday peak
135            ],
136            line_item_range: (1, 50),
137            avg_daily_transactions: 500,
138        }
139    }
140
141    /// Create a manufacturing industry profile.
142    ///
143    /// Characteristics:
144    /// - Mix of raw materials, components, and finished goods
145    /// - Larger average transaction sizes
146    /// - B2B payment terms (Net 30-60)
147    /// - Production-driven seasonality
148    pub fn manufacturing() -> Self {
149        Self {
150            industry: IndustryType::Manufacturing,
151            sales_amounts: LogNormalMixtureConfig {
152                components: vec![
153                    // Standard product orders (50%)
154                    LogNormalComponent::with_label(0.50, 8.0, 1.5, "standard"),
155                    // Large orders (35%)
156                    LogNormalComponent::with_label(0.35, 10.0, 1.0, "large"),
157                    // Enterprise/contract orders (15%)
158                    LogNormalComponent::with_label(0.15, 12.0, 0.8, "enterprise"),
159                ],
160                min_value: 500.0,
161                max_value: Some(10_000_000.0),
162                decimal_places: 2,
163            },
164            purchase_amounts: LogNormalMixtureConfig {
165                components: vec![
166                    // Raw materials (55%)
167                    LogNormalComponent::with_label(0.55, 8.5, 1.5, "raw_materials"),
168                    // Components/parts (30%)
169                    LogNormalComponent::with_label(0.30, 7.5, 1.2, "components"),
170                    // Equipment/tooling (15%)
171                    LogNormalComponent::with_label(0.15, 10.0, 1.0, "equipment"),
172                ],
173                min_value: 100.0,
174                max_value: Some(5_000_000.0),
175                decimal_places: 2,
176            },
177            payroll_amounts: LogNormalMixtureConfig {
178                components: vec![
179                    // Production workers (50%)
180                    LogNormalComponent::with_label(0.50, 7.5, 0.5, "production"),
181                    // Technical staff (30%)
182                    LogNormalComponent::with_label(0.30, 8.0, 0.4, "technical"),
183                    // Management (15%)
184                    LogNormalComponent::with_label(0.15, 9.0, 0.5, "management"),
185                    // Executive (5%)
186                    LogNormalComponent::with_label(0.05, 10.0, 0.4, "executive"),
187                ],
188                min_value: 1000.0,
189                max_value: Some(100_000.0),
190                decimal_places: 2,
191            },
192            capex_amounts: ParetoConfig {
193                alpha: 1.5, // Heavier tail - large equipment purchases
194                x_min: 25_000.0,
195                max_value: Some(10_000_000.0),
196                decimal_places: 2,
197            },
198            days_to_payment: WeibullConfig {
199                shape: 2.0,
200                scale: 45.0, // Net 45 typical
201                min_value: 5.0,
202                max_value: Some(90.0),
203                round_to_integer: true,
204            },
205            seasonality: [
206                0.90, // Jan
207                0.95, // Feb
208                1.00, // Mar
209                1.05, // Apr
210                1.00, // May
211                0.95, // Jun
212                0.85, // Jul - summer slowdown
213                0.90, // Aug
214                1.05, // Sep
215                1.10, // Oct
216                1.05, // Nov
217                0.85, // Dec - holiday shutdown
218            ],
219            line_item_range: (2, 25),
220            avg_daily_transactions: 50,
221        }
222    }
223
224    /// Create a financial services industry profile.
225    ///
226    /// Characteristics:
227    /// - High-value wire transfers and ACH
228    /// - Fee-based income (many small transactions)
229    /// - Regulatory-driven patterns
230    /// - Month-end/quarter-end spikes
231    pub fn financial_services() -> Self {
232        Self {
233            industry: IndustryType::FinancialServices,
234            sales_amounts: LogNormalMixtureConfig {
235                components: vec![
236                    // ACH/small transfers (40%)
237                    LogNormalComponent::with_label(0.40, 6.0, 1.5, "ach_small"),
238                    // Medium transactions (30%)
239                    LogNormalComponent::with_label(0.30, 9.0, 1.5, "medium"),
240                    // Large wire transfers (20%)
241                    LogNormalComponent::with_label(0.20, 12.0, 2.0, "wire_large"),
242                    // Institutional (10%)
243                    LogNormalComponent::with_label(0.10, 15.0, 1.5, "institutional"),
244                ],
245                min_value: 1.0,
246                max_value: Some(100_000_000.0),
247                decimal_places: 2,
248            },
249            purchase_amounts: LogNormalMixtureConfig {
250                components: vec![
251                    // Software/licenses (40%)
252                    LogNormalComponent::with_label(0.40, 7.0, 1.0, "software"),
253                    // Professional services (35%)
254                    LogNormalComponent::with_label(0.35, 9.0, 1.2, "professional"),
255                    // Technology infrastructure (25%)
256                    LogNormalComponent::with_label(0.25, 11.0, 1.0, "infrastructure"),
257                ],
258                min_value: 500.0,
259                max_value: Some(10_000_000.0),
260                decimal_places: 2,
261            },
262            payroll_amounts: LogNormalMixtureConfig {
263                components: vec![
264                    // Operations/support (30%)
265                    LogNormalComponent::with_label(0.30, 8.0, 0.5, "operations"),
266                    // Analysts/associates (35%)
267                    LogNormalComponent::with_label(0.35, 9.0, 0.4, "analyst"),
268                    // Senior professionals (25%)
269                    LogNormalComponent::with_label(0.25, 10.0, 0.4, "senior"),
270                    // Executives (10%)
271                    LogNormalComponent::with_label(0.10, 11.5, 0.5, "executive"),
272                ],
273                min_value: 2000.0,
274                max_value: Some(500_000.0),
275                decimal_places: 2,
276            },
277            capex_amounts: ParetoConfig {
278                alpha: 1.8,
279                x_min: 50_000.0,
280                max_value: Some(50_000_000.0),
281                decimal_places: 2,
282            },
283            days_to_payment: WeibullConfig {
284                shape: 3.0,  // More predictable
285                scale: 10.0, // Fast payment cycles
286                min_value: 1.0,
287                max_value: Some(30.0),
288                round_to_integer: true,
289            },
290            seasonality: [
291                1.05, // Jan - year start
292                0.95, // Feb
293                1.15, // Mar - quarter end
294                1.00, // Apr
295                0.95, // May
296                1.15, // Jun - quarter end
297                0.90, // Jul
298                0.90, // Aug
299                1.15, // Sep - quarter end
300                1.00, // Oct
301                0.95, // Nov
302                1.25, // Dec - year end
303            ],
304            line_item_range: (1, 10),
305            avg_daily_transactions: 1000,
306        }
307    }
308
309    /// Create a healthcare industry profile.
310    ///
311    /// Characteristics:
312    /// - Insurance claims and patient payments
313    /// - Medical supply procurement
314    /// - Regulatory and compliance costs
315    /// - Seasonal illness patterns
316    pub fn healthcare() -> Self {
317        Self {
318            industry: IndustryType::Healthcare,
319            sales_amounts: LogNormalMixtureConfig {
320                components: vec![
321                    // Copays/small claims (40%)
322                    LogNormalComponent::with_label(0.40, 4.0, 1.0, "copay"),
323                    // Standard procedures (35%)
324                    LogNormalComponent::with_label(0.35, 7.0, 1.5, "procedures"),
325                    // Specialist services (20%)
326                    LogNormalComponent::with_label(0.20, 9.0, 1.2, "specialist"),
327                    // Major treatments (5%)
328                    LogNormalComponent::with_label(0.05, 11.0, 1.0, "major"),
329                ],
330                min_value: 10.0,
331                max_value: Some(1_000_000.0),
332                decimal_places: 2,
333            },
334            purchase_amounts: LogNormalMixtureConfig {
335                components: vec![
336                    // Consumable supplies (45%)
337                    LogNormalComponent::with_label(0.45, 6.0, 1.2, "consumables"),
338                    // Pharmaceuticals (35%)
339                    LogNormalComponent::with_label(0.35, 8.0, 1.5, "pharma"),
340                    // Medical equipment (20%)
341                    LogNormalComponent::with_label(0.20, 10.0, 1.0, "equipment"),
342                ],
343                min_value: 50.0,
344                max_value: Some(5_000_000.0),
345                decimal_places: 2,
346            },
347            payroll_amounts: LogNormalMixtureConfig {
348                components: vec![
349                    // Support staff (35%)
350                    LogNormalComponent::with_label(0.35, 7.5, 0.5, "support"),
351                    // Nurses/technicians (35%)
352                    LogNormalComponent::with_label(0.35, 8.5, 0.4, "clinical"),
353                    // Physicians (25%)
354                    LogNormalComponent::with_label(0.25, 10.0, 0.5, "physician"),
355                    // Specialists (5%)
356                    LogNormalComponent::with_label(0.05, 11.0, 0.4, "specialist"),
357                ],
358                min_value: 1500.0,
359                max_value: Some(200_000.0),
360                decimal_places: 2,
361            },
362            capex_amounts: ParetoConfig {
363                alpha: 1.6,
364                x_min: 10_000.0,
365                max_value: Some(20_000_000.0),
366                decimal_places: 2,
367            },
368            days_to_payment: WeibullConfig {
369                shape: 1.5,  // More variance due to insurance
370                scale: 60.0, // Insurance processing time
371                min_value: 10.0,
372                max_value: Some(180.0),
373                round_to_integer: true,
374            },
375            seasonality: [
376                1.15, // Jan - flu season
377                1.10, // Feb
378                1.00, // Mar
379                0.95, // Apr
380                0.90, // May
381                0.90, // Jun
382                0.85, // Jul
383                0.90, // Aug
384                0.95, // Sep
385                1.00, // Oct
386                1.05, // Nov
387                1.10, // Dec - holiday injuries/illness
388            ],
389            line_item_range: (1, 30),
390            avg_daily_transactions: 200,
391        }
392    }
393
394    /// Create a technology/SaaS industry profile.
395    ///
396    /// Characteristics:
397    /// - Subscription-based revenue
398    /// - High R&D and cloud costs
399    /// - Fast growth patterns
400    /// - Minimal seasonality
401    pub fn technology() -> Self {
402        Self {
403            industry: IndustryType::Technology,
404            sales_amounts: LogNormalMixtureConfig {
405                components: vec![
406                    // SMB subscriptions (50%)
407                    LogNormalComponent::with_label(0.50, 5.5, 1.0, "smb"),
408                    // Mid-market (30%)
409                    LogNormalComponent::with_label(0.30, 8.0, 1.0, "midmarket"),
410                    // Enterprise contracts (15%)
411                    LogNormalComponent::with_label(0.15, 10.5, 1.2, "enterprise"),
412                    // Large deals (5%)
413                    LogNormalComponent::with_label(0.05, 13.0, 0.8, "strategic"),
414                ],
415                min_value: 10.0,
416                max_value: Some(10_000_000.0),
417                decimal_places: 2,
418            },
419            purchase_amounts: LogNormalMixtureConfig {
420                components: vec![
421                    // SaaS tools (40%)
422                    LogNormalComponent::with_label(0.40, 6.0, 1.0, "saas"),
423                    // Cloud infrastructure (35%)
424                    LogNormalComponent::with_label(0.35, 8.5, 1.5, "cloud"),
425                    // Hardware/equipment (15%)
426                    LogNormalComponent::with_label(0.15, 7.5, 1.0, "hardware"),
427                    // Contractors (10%)
428                    LogNormalComponent::with_label(0.10, 9.0, 1.0, "contractors"),
429                ],
430                min_value: 50.0,
431                max_value: Some(5_000_000.0),
432                decimal_places: 2,
433            },
434            payroll_amounts: LogNormalMixtureConfig {
435                components: vec![
436                    // Junior engineers (25%)
437                    LogNormalComponent::with_label(0.25, 8.5, 0.4, "junior"),
438                    // Mid-level (40%)
439                    LogNormalComponent::with_label(0.40, 9.2, 0.3, "mid"),
440                    // Senior engineers (25%)
441                    LogNormalComponent::with_label(0.25, 10.0, 0.3, "senior"),
442                    // Leadership (10%)
443                    LogNormalComponent::with_label(0.10, 11.0, 0.4, "leadership"),
444                ],
445                min_value: 3000.0,
446                max_value: Some(300_000.0),
447                decimal_places: 2,
448            },
449            capex_amounts: ParetoConfig {
450                alpha: 2.2, // Less extreme tail
451                x_min: 10_000.0,
452                max_value: Some(2_000_000.0),
453                decimal_places: 2,
454            },
455            days_to_payment: WeibullConfig {
456                shape: 2.5,  // Predictable (often auto-pay)
457                scale: 15.0, // Fast cycles
458                min_value: 0.0,
459                max_value: Some(45.0),
460                round_to_integer: true,
461            },
462            seasonality: [
463                0.95, // Jan
464                0.95, // Feb
465                1.00, // Mar
466                1.00, // Apr
467                1.00, // May
468                1.00, // Jun
469                0.95, // Jul
470                0.95, // Aug
471                1.05, // Sep
472                1.05, // Oct
473                1.00, // Nov
474                1.05, // Dec
475            ],
476            line_item_range: (1, 15),
477            avg_daily_transactions: 100,
478        }
479    }
480
481    /// Get the industry profile for a given industry type.
482    pub fn for_industry(industry: IndustryType) -> Self {
483        match industry {
484            IndustryType::Retail => Self::retail(),
485            IndustryType::Manufacturing => Self::manufacturing(),
486            IndustryType::FinancialServices => Self::financial_services(),
487            IndustryType::Healthcare => Self::healthcare(),
488            IndustryType::Technology => Self::technology(),
489            IndustryType::Wholesale => Self::manufacturing(), // Similar to manufacturing
490            IndustryType::ProfessionalServices => Self::technology(), // Similar to tech
491            IndustryType::Construction => Self::manufacturing(), // Similar pattern
492        }
493    }
494
495    /// Get the seasonality multiplier for a given month (0 = January).
496    pub fn seasonality_multiplier(&self, month: u8) -> f64 {
497        self.seasonality[(month % 12) as usize]
498    }
499}
500
501#[cfg(test)]
502#[allow(clippy::unwrap_used)]
503mod tests {
504    use super::*;
505
506    #[test]
507    fn test_retail_profile() {
508        let profile = IndustryAmountProfile::retail();
509        assert_eq!(profile.industry, IndustryType::Retail);
510        assert!(profile.sales_amounts.validate().is_ok());
511        assert!(profile.purchase_amounts.validate().is_ok());
512    }
513
514    #[test]
515    fn test_manufacturing_profile() {
516        let profile = IndustryAmountProfile::manufacturing();
517        assert_eq!(profile.industry, IndustryType::Manufacturing);
518        assert!(profile.sales_amounts.validate().is_ok());
519    }
520
521    #[test]
522    fn test_financial_services_profile() {
523        let profile = IndustryAmountProfile::financial_services();
524        assert_eq!(profile.industry, IndustryType::FinancialServices);
525        assert!(profile.sales_amounts.validate().is_ok());
526    }
527
528    #[test]
529    fn test_healthcare_profile() {
530        let profile = IndustryAmountProfile::healthcare();
531        assert_eq!(profile.industry, IndustryType::Healthcare);
532        assert!(profile.sales_amounts.validate().is_ok());
533    }
534
535    #[test]
536    fn test_technology_profile() {
537        let profile = IndustryAmountProfile::technology();
538        assert_eq!(profile.industry, IndustryType::Technology);
539        assert!(profile.sales_amounts.validate().is_ok());
540    }
541
542    #[test]
543    fn test_seasonality() {
544        let retail = IndustryAmountProfile::retail();
545
546        // December should be highest for retail
547        assert_eq!(retail.seasonality_multiplier(11), 1.75);
548
549        // February should be lowest
550        assert_eq!(retail.seasonality_multiplier(1), 0.70);
551
552        // Seasonality factors should be reasonable
553        for month in 0..12 {
554            let factor = retail.seasonality_multiplier(month);
555            assert!(factor > 0.5 && factor < 2.0);
556        }
557    }
558
559    #[test]
560    fn test_for_industry() {
561        let retail = IndustryAmountProfile::for_industry(IndustryType::Retail);
562        assert_eq!(retail.industry, IndustryType::Retail);
563
564        let tech = IndustryAmountProfile::for_industry(IndustryType::Technology);
565        assert_eq!(tech.industry, IndustryType::Technology);
566    }
567
568    #[test]
569    fn test_component_weights_sum() {
570        let profiles = [
571            IndustryAmountProfile::retail(),
572            IndustryAmountProfile::manufacturing(),
573            IndustryAmountProfile::financial_services(),
574            IndustryAmountProfile::healthcare(),
575            IndustryAmountProfile::technology(),
576        ];
577
578        for profile in &profiles {
579            let sales_sum: f64 = profile
580                .sales_amounts
581                .components
582                .iter()
583                .map(|c| c.weight)
584                .sum();
585            assert!(
586                (sales_sum - 1.0).abs() < 0.01,
587                "Sales weights should sum to 1.0"
588            );
589
590            let purchase_sum: f64 = profile
591                .purchase_amounts
592                .components
593                .iter()
594                .map(|c| c.weight)
595                .sum();
596            assert!(
597                (purchase_sum - 1.0).abs() < 0.01,
598                "Purchase weights should sum to 1.0"
599            );
600
601            let payroll_sum: f64 = profile
602                .payroll_amounts
603                .components
604                .iter()
605                .map(|c| c.weight)
606                .sum();
607            assert!(
608                (payroll_sum - 1.0).abs() < 0.01,
609                "Payroll weights should sum to 1.0"
610            );
611        }
612    }
613}