Skip to main content

datasynth_core/distributions/
weibull.rs

1//! Weibull distribution for time-to-event and duration modeling.
2//!
3//! The Weibull distribution is commonly used for modeling:
4//! - Days-to-payment (accounts receivable aging)
5//! - Time-to-failure (asset depreciation studies)
6//! - Processing times (document flow durations)
7
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use rand_distr::{Distribution, Weibull};
11use serde::{Deserialize, Serialize};
12
13/// Configuration for Weibull distribution.
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct WeibullConfig {
16    /// Shape parameter (k) - controls the shape of the distribution.
17    /// k < 1: decreasing failure rate (early failures more likely)
18    /// k = 1: constant failure rate (exponential distribution)
19    /// k > 1: increasing failure rate (wear-out failures)
20    pub shape: f64,
21    /// Scale parameter (lambda) - controls the characteristic life.
22    /// 63.2% of values will be below this threshold.
23    pub scale: f64,
24    /// Minimum value (shifts the distribution).
25    #[serde(default)]
26    pub min_value: f64,
27    /// Maximum value (clamps output).
28    #[serde(default)]
29    pub max_value: Option<f64>,
30    /// Whether to round to integers (useful for days).
31    #[serde(default)]
32    pub round_to_integer: bool,
33}
34
35impl Default for WeibullConfig {
36    fn default() -> Self {
37        Self {
38            shape: 1.5,  // Increasing failure rate
39            scale: 30.0, // 30 day characteristic time
40            min_value: 0.0,
41            max_value: None,
42            round_to_integer: false,
43        }
44    }
45}
46
47impl WeibullConfig {
48    /// Create a new Weibull configuration.
49    pub fn new(shape: f64, scale: f64) -> Self {
50        Self {
51            shape,
52            scale,
53            ..Default::default()
54        }
55    }
56
57    /// Create a configuration for days-to-payment modeling.
58    pub fn days_to_payment() -> Self {
59        Self {
60            shape: 1.8,             // Slight increasing hazard (more likely to pay as time goes on)
61            scale: 35.0,            // Characteristic payment around 35 days
62            min_value: 1.0,         // At least 1 day
63            max_value: Some(120.0), // Cap at 120 days
64            round_to_integer: true,
65        }
66    }
67
68    /// Create a configuration for early payment behavior.
69    pub fn early_payment() -> Self {
70        Self {
71            shape: 2.5,  // Strong increasing hazard
72            scale: 15.0, // Characteristic payment around 15 days
73            min_value: 1.0,
74            max_value: Some(30.0),
75            round_to_integer: true,
76        }
77    }
78
79    /// Create a configuration for late payment behavior.
80    pub fn late_payment() -> Self {
81        Self {
82            shape: 0.8,      // Decreasing hazard (procrastination)
83            scale: 60.0,     // Characteristic payment around 60 days
84            min_value: 30.0, // Already past due date
85            max_value: Some(180.0),
86            round_to_integer: true,
87        }
88    }
89
90    /// Create a configuration for processing time.
91    pub fn processing_time() -> Self {
92        Self {
93            shape: 2.0,            // Bell-shaped, typical processing time
94            scale: 3.0,            // ~3 hours characteristic time
95            min_value: 0.5,        // At least 30 minutes
96            max_value: Some(24.0), // Cap at 24 hours
97            round_to_integer: false,
98        }
99    }
100
101    /// Create a configuration for asset useful life (years).
102    pub fn asset_useful_life() -> Self {
103        Self {
104            shape: 3.5, // Wear-out failure pattern
105            scale: 7.0, // ~7 year characteristic life
106            min_value: 1.0,
107            max_value: Some(20.0),
108            round_to_integer: true,
109        }
110    }
111
112    /// Validate the configuration.
113    pub fn validate(&self) -> Result<(), String> {
114        if self.shape <= 0.0 {
115            return Err("shape must be positive".to_string());
116        }
117        if self.scale <= 0.0 {
118            return Err("scale must be positive".to_string());
119        }
120        if let Some(max) = self.max_value {
121            if max <= self.min_value {
122                return Err("max_value must be greater than min_value".to_string());
123            }
124        }
125        Ok(())
126    }
127
128    /// Get the expected value (mean) of the distribution.
129    pub fn expected_value(&self) -> f64 {
130        use std::f64::consts::PI;
131
132        // E[X] = scale * Gamma(1 + 1/shape)
133        // Using Stirling approximation for Gamma function
134        let arg = 1.0 + 1.0 / self.shape;
135        let gamma_approx = (2.0 * PI / arg).sqrt() * (arg / std::f64::consts::E).powf(arg);
136        self.min_value + self.scale * gamma_approx
137    }
138
139    /// Get the median of the distribution.
140    pub fn median(&self) -> f64 {
141        self.min_value + self.scale * (2.0_f64.ln()).powf(1.0 / self.shape)
142    }
143
144    /// Get the mode of the distribution.
145    /// Only defined for shape > 1.
146    pub fn mode(&self) -> Option<f64> {
147        if self.shape > 1.0 {
148            let mode = self.scale * ((self.shape - 1.0) / self.shape).powf(1.0 / self.shape);
149            Some(self.min_value + mode)
150        } else {
151            None
152        }
153    }
154}
155
156/// Weibull distribution sampler.
157pub struct WeibullSampler {
158    rng: ChaCha8Rng,
159    config: WeibullConfig,
160    distribution: Weibull<f64>,
161}
162
163impl WeibullSampler {
164    /// Create a new Weibull sampler.
165    pub fn new(seed: u64, config: WeibullConfig) -> Result<Self, String> {
166        config.validate()?;
167
168        let distribution = Weibull::new(config.scale, config.shape)
169            .map_err(|e| format!("Invalid Weibull distribution: {}", e))?;
170
171        Ok(Self {
172            rng: ChaCha8Rng::seed_from_u64(seed),
173            config,
174            distribution,
175        })
176    }
177
178    /// Sample a value from the distribution.
179    pub fn sample(&mut self) -> f64 {
180        let mut value = self.distribution.sample(&mut self.rng) + self.config.min_value;
181
182        // Apply max constraint
183        if let Some(max) = self.config.max_value {
184            value = value.min(max);
185        }
186
187        // Round to integer if configured
188        if self.config.round_to_integer {
189            value = value.round();
190        }
191
192        value
193    }
194
195    /// Sample a value as integer (for days).
196    pub fn sample_days(&mut self) -> u32 {
197        self.sample().max(0.0) as u32
198    }
199
200    /// Sample multiple values.
201    pub fn sample_n(&mut self, n: usize) -> Vec<f64> {
202        (0..n).map(|_| self.sample()).collect()
203    }
204
205    /// Sample multiple values as days.
206    pub fn sample_n_days(&mut self, n: usize) -> Vec<u32> {
207        (0..n).map(|_| self.sample_days()).collect()
208    }
209
210    /// Reset the sampler with a new seed.
211    pub fn reset(&mut self, seed: u64) {
212        self.rng = ChaCha8Rng::seed_from_u64(seed);
213    }
214
215    /// Get the configuration.
216    pub fn config(&self) -> &WeibullConfig {
217        &self.config
218    }
219}
220
221/// Result of survival analysis using Weibull.
222#[derive(Debug, Clone)]
223pub struct WeibullSurvivalResult {
224    /// Time point
225    pub time: f64,
226    /// Survival probability at time t
227    pub survival_probability: f64,
228    /// Hazard rate at time t
229    pub hazard_rate: f64,
230}
231
232impl WeibullConfig {
233    /// Calculate survival probability at time t.
234    pub fn survival_probability(&self, t: f64) -> f64 {
235        if t <= self.min_value {
236            return 1.0;
237        }
238        let adjusted_t = t - self.min_value;
239        (-((adjusted_t / self.scale).powf(self.shape))).exp()
240    }
241
242    /// Calculate hazard rate at time t.
243    pub fn hazard_rate(&self, t: f64) -> f64 {
244        if t <= self.min_value {
245            if self.shape < 1.0 {
246                return f64::INFINITY; // Decreasing hazard starts at infinity
247            }
248            return 0.0;
249        }
250        let adjusted_t = t - self.min_value;
251        (self.shape / self.scale) * (adjusted_t / self.scale).powf(self.shape - 1.0)
252    }
253
254    /// Generate survival analysis data.
255    pub fn survival_analysis(&self, time_points: &[f64]) -> Vec<WeibullSurvivalResult> {
256        time_points
257            .iter()
258            .map(|&t| WeibullSurvivalResult {
259                time: t,
260                survival_probability: self.survival_probability(t),
261                hazard_rate: self.hazard_rate(t),
262            })
263            .collect()
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn test_weibull_validation() {
273        let config = WeibullConfig::new(1.5, 30.0);
274        assert!(config.validate().is_ok());
275
276        let invalid_shape = WeibullConfig::new(-1.0, 30.0);
277        assert!(invalid_shape.validate().is_err());
278
279        let invalid_scale = WeibullConfig::new(1.5, 0.0);
280        assert!(invalid_scale.validate().is_err());
281    }
282
283    #[test]
284    fn test_weibull_sampling() {
285        let config = WeibullConfig::new(1.5, 30.0);
286        let mut sampler = WeibullSampler::new(42, config).unwrap();
287
288        let samples = sampler.sample_n(1000);
289        assert_eq!(samples.len(), 1000);
290
291        // All samples should be non-negative
292        assert!(samples.iter().all(|&x| x >= 0.0));
293    }
294
295    #[test]
296    fn test_weibull_determinism() {
297        let config = WeibullConfig::new(1.5, 30.0);
298
299        let mut sampler1 = WeibullSampler::new(42, config.clone()).unwrap();
300        let mut sampler2 = WeibullSampler::new(42, config).unwrap();
301
302        for _ in 0..100 {
303            assert_eq!(sampler1.sample(), sampler2.sample());
304        }
305    }
306
307    #[test]
308    fn test_weibull_days_to_payment() {
309        let config = WeibullConfig::days_to_payment();
310        let mut sampler = WeibullSampler::new(42, config.clone()).unwrap();
311
312        let samples = sampler.sample_n_days(1000);
313
314        // All should be at least 1 day and at most 120 days
315        assert!(samples.iter().all(|&x| (1..=120).contains(&x)));
316
317        // Most should be around the characteristic time (35 days)
318        let median_approx = samples.iter().copied().sum::<u32>() as f64 / 1000.0;
319        assert!(median_approx > 20.0 && median_approx < 60.0);
320    }
321
322    #[test]
323    fn test_weibull_median() {
324        let config = WeibullConfig::new(2.0, 30.0);
325        let median = config.median();
326
327        // For k=2, median = scale * sqrt(ln(2)) ≈ 30 * 0.833 ≈ 24.99
328        assert!((median - 24.99).abs() < 0.1);
329    }
330
331    #[test]
332    fn test_weibull_mode() {
333        let config = WeibullConfig::new(2.0, 30.0);
334        let mode = config.mode();
335
336        // For k=2, mode = scale * sqrt((k-1)/k) = 30 * sqrt(0.5) ≈ 21.21
337        assert!(mode.is_some());
338        assert!((mode.unwrap() - 21.21).abs() < 0.1);
339
340        // No mode for k <= 1
341        let no_mode_config = WeibullConfig::new(0.8, 30.0);
342        assert!(no_mode_config.mode().is_none());
343    }
344
345    #[test]
346    fn test_weibull_survival() {
347        let config = WeibullConfig::new(2.0, 30.0);
348
349        // At t=0, survival should be 1.0
350        assert!((config.survival_probability(0.0) - 1.0).abs() < 0.001);
351
352        // At t=median, survival should be 0.5
353        let median = config.median();
354        assert!((config.survival_probability(median) - 0.5).abs() < 0.01);
355
356        // At t→∞, survival should approach 0
357        assert!(config.survival_probability(1000.0) < 0.001);
358    }
359
360    #[test]
361    fn test_weibull_hazard_shapes() {
362        // Decreasing hazard (k < 1)
363        let config_dec = WeibullConfig::new(0.5, 30.0);
364        assert!(config_dec.hazard_rate(10.0) > config_dec.hazard_rate(50.0));
365
366        // Increasing hazard (k > 1)
367        let config_inc = WeibullConfig::new(2.0, 30.0);
368        assert!(config_inc.hazard_rate(10.0) < config_inc.hazard_rate(50.0));
369    }
370
371    #[test]
372    fn test_weibull_presets() {
373        let early = WeibullConfig::early_payment();
374        assert!(early.validate().is_ok());
375
376        let late = WeibullConfig::late_payment();
377        assert!(late.validate().is_ok());
378
379        let processing = WeibullConfig::processing_time();
380        assert!(processing.validate().is_ok());
381
382        let asset = WeibullConfig::asset_useful_life();
383        assert!(asset.validate().is_ok());
384    }
385}