uncertain_rs/
statistics.rs

1#![allow(
2    clippy::cast_precision_loss,
3    clippy::cast_possible_truncation,
4    clippy::cast_sign_loss
5)]
6
7use crate::Uncertain;
8use crate::cache;
9use crate::computation::AdaptiveSampling;
10use crate::traits::Shareable;
11use std::cell::RefCell;
12use std::collections::HashMap;
13use std::hash::Hash;
14use std::sync::Arc;
15
16/// Lazy statistical computation wrapper that defers expensive operations
17/// until they are actually needed and caches intermediate results
18#[derive(Debug)]
19pub struct LazyStats<T>
20where
21    T: Shareable,
22{
23    uncertain: Arc<Uncertain<T>>,
24    sample_count: usize,
25    samples_cache: RefCell<Option<Vec<T>>>,
26    mean_cache: RefCell<Option<f64>>,
27    variance_cache: RefCell<Option<f64>>,
28    sorted_samples_cache: RefCell<Option<Vec<f64>>>,
29}
30
31impl<T> LazyStats<T>
32where
33    T: Shareable + Into<f64> + Clone,
34{
35    /// Helper method to implement the lazy computation pattern
36    fn get_or_compute<V, F>(cache: &RefCell<Option<V>>, compute: F) -> V
37    where
38        V: Clone,
39        F: FnOnce() -> V,
40    {
41        let mut cache_ref = cache.borrow_mut();
42        if let Some(value) = cache_ref.as_ref() {
43            value.clone()
44        } else {
45            let computed = compute();
46            *cache_ref = Some(computed.clone());
47            computed
48        }
49    }
50    /// Create a new lazy statistics wrapper
51    #[must_use]
52    pub fn new(uncertain: &Uncertain<T>, sample_count: usize) -> Self {
53        Self {
54            uncertain: Arc::new(uncertain.clone()),
55            sample_count,
56            samples_cache: RefCell::new(None),
57            mean_cache: RefCell::new(None),
58            variance_cache: RefCell::new(None),
59            sorted_samples_cache: RefCell::new(None),
60        }
61    }
62
63    /// Get samples, computing them only once
64    pub fn samples(&self) -> Vec<T> {
65        Self::get_or_compute(&self.samples_cache, || {
66            self.uncertain.take_samples(self.sample_count)
67        })
68    }
69
70    /// Get mean, computing it lazily from cached samples
71    pub fn mean(&self) -> f64 {
72        Self::get_or_compute(&self.mean_cache, || {
73            let samples = self.samples();
74            let sum: f64 = samples.into_iter().map(Into::into).sum();
75            sum / self.sample_count as f64
76        })
77    }
78
79    /// Get variance, computing it lazily from cached samples using numerically stable formula
80    pub fn variance(&self) -> f64 {
81        Self::get_or_compute(&self.variance_cache, || {
82            let samples = self.samples();
83            let mean = self.mean();
84
85            // Use iterator references to avoid cloning and numerically stable variance formula
86            let sum_sq_diff: f64 = samples
87                .iter()
88                .map(|x| {
89                    let diff = Into::<f64>::into(x.clone()) - mean;
90                    diff * diff
91                })
92                .sum();
93
94            sum_sq_diff / self.sample_count as f64
95        })
96    }
97
98    /// Get standard deviation, computing it lazily from variance
99    pub fn std_dev(&self) -> f64 {
100        self.variance().sqrt()
101    }
102
103    /// Get sorted samples for quantile operations, computing them only once
104    fn sorted_samples(&self) -> Vec<f64>
105    where
106        T: PartialOrd,
107    {
108        Self::get_or_compute(&self.sorted_samples_cache, || {
109            let samples = self.samples();
110            let mut sorted: Vec<f64> = samples
111                .iter()
112                .map(|x| Into::<f64>::into(x.clone()))
113                .collect();
114            sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
115            sorted
116        })
117    }
118
119    /// Get quantile using linear interpolation for more accurate results
120    pub fn quantile(&self, q: f64) -> f64
121    where
122        T: PartialOrd,
123    {
124        let samples = self.sorted_samples();
125        if samples.is_empty() {
126            return 0.0;
127        }
128        if samples.len() == 1 {
129            return samples[0];
130        }
131
132        let position = q * (samples.len() - 1) as f64;
133        let lower_idx = position.floor() as usize;
134        let upper_idx = position.ceil() as usize;
135
136        if lower_idx == upper_idx {
137            samples[lower_idx.min(samples.len() - 1)]
138        } else {
139            let lower_val = samples[lower_idx];
140            let upper_val = samples[upper_idx.min(samples.len() - 1)];
141            let weight = position - lower_idx as f64;
142            lower_val + weight * (upper_val - lower_val)
143        }
144    }
145
146    /// Get confidence interval using cached sorted samples
147    pub fn confidence_interval(&self, confidence: f64) -> (f64, f64)
148    where
149        T: PartialOrd,
150    {
151        let samples = self.sorted_samples();
152        let alpha = 1.0 - confidence;
153        let lower_idx = ((alpha / 2.0) * samples.len() as f64) as usize;
154        let upper_idx = (((1.0 - alpha / 2.0) * samples.len() as f64) as usize).saturating_sub(1);
155
156        let lower_idx = lower_idx.min(samples.len() - 1);
157        let upper_idx = upper_idx.min(samples.len() - 1);
158
159        (samples[lower_idx], samples[upper_idx])
160    }
161}
162
163/// Progressive statistical computation that builds results incrementally
164#[derive(Debug)]
165pub struct ProgressiveStats {
166    count: usize,
167    sum: f64,
168    sum_squares: f64,
169    min_val: f64,
170    max_val: f64,
171}
172
173impl ProgressiveStats {
174    /// Create a new progressive statistics accumulator
175    #[must_use]
176    pub fn new() -> Self {
177        Self {
178            count: 0,
179            sum: 0.0,
180            sum_squares: 0.0,
181            min_val: f64::INFINITY,
182            max_val: f64::NEG_INFINITY,
183        }
184    }
185
186    /// Add a sample to the progressive computation
187    pub fn add_sample(&mut self, value: f64) {
188        self.count += 1;
189        self.sum += value;
190        self.sum_squares += value * value;
191        self.min_val = self.min_val.min(value);
192        self.max_val = self.max_val.max(value);
193    }
194
195    /// Get the current mean
196    #[must_use]
197    pub fn mean(&self) -> f64 {
198        if self.count == 0 {
199            0.0
200        } else {
201            self.sum / self.count as f64
202        }
203    }
204
205    /// Get the current variance
206    #[must_use]
207    pub fn variance(&self) -> f64 {
208        if self.count <= 1 {
209            0.0
210        } else {
211            let mean = self.mean();
212            (self.sum_squares - self.count as f64 * mean * mean) / (self.count - 1) as f64
213        }
214    }
215
216    /// Get the current standard deviation
217    #[must_use]
218    pub fn std_dev(&self) -> f64 {
219        self.variance().sqrt()
220    }
221
222    /// Get the current range
223    #[must_use]
224    pub fn range(&self) -> f64 {
225        if self.count == 0 {
226            0.0
227        } else {
228            self.max_val - self.min_val
229        }
230    }
231
232    /// Get the sample count
233    #[must_use]
234    pub fn count(&self) -> usize {
235        self.count
236    }
237}
238
239impl Default for ProgressiveStats {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245/// Adaptive lazy statistical computation that dynamically determines optimal sample counts
246/// for different statistical operations based on convergence criteria
247#[derive(Debug)]
248pub struct AdaptiveLazyStats<T>
249where
250    T: Shareable,
251{
252    uncertain: Arc<Uncertain<T>>,
253    config: AdaptiveSampling,
254    progressive_stats: RefCell<ProgressiveStats>,
255    current_samples: RefCell<usize>,
256    converged_stats: RefCell<HashMap<String, f64>>,
257}
258
259impl<T> AdaptiveLazyStats<T>
260where
261    T: Shareable + Into<f64> + Clone,
262{
263    /// Create a new adaptive lazy statistics wrapper
264    #[must_use]
265    pub fn new(uncertain: &Uncertain<T>, config: &AdaptiveSampling) -> Self {
266        Self {
267            uncertain: Arc::new(uncertain.clone()),
268            config: config.clone(),
269            progressive_stats: RefCell::new(ProgressiveStats::new()),
270            current_samples: RefCell::new(0),
271            converged_stats: RefCell::new(HashMap::new()),
272        }
273    }
274
275    /// Add more samples until we reach the target count or convergence
276    fn ensure_samples(&self, target_samples: usize) {
277        let mut current = self.current_samples.borrow_mut();
278        let mut stats = self.progressive_stats.borrow_mut();
279
280        if *current < target_samples {
281            let additional_samples = target_samples - *current;
282            let samples = self.uncertain.take_samples(additional_samples);
283
284            for sample in samples {
285                stats.add_sample(sample.into());
286            }
287
288            *current = target_samples;
289        }
290    }
291
292    /// Get mean with adaptive convergence
293    pub fn mean(&self) -> f64 {
294        let mut converged = self.converged_stats.borrow_mut();
295
296        if let Some(&cached) = converged.get("mean") {
297            return cached;
298        }
299
300        let mut sample_count = self.config.min_samples;
301        let mut prev_mean = 0.0;
302
303        loop {
304            self.ensure_samples(sample_count);
305            let stats = self.progressive_stats.borrow();
306            let mean = stats.mean();
307
308            if sample_count > self.config.min_samples {
309                let relative_error = if mean == 0.0 {
310                    (mean - prev_mean).abs()
311                } else {
312                    ((mean - prev_mean) / mean).abs()
313                };
314
315                if relative_error < self.config.error_threshold
316                    || sample_count >= self.config.max_samples
317                {
318                    converged.insert("mean".to_string(), mean);
319                    return mean;
320                }
321            }
322
323            prev_mean = mean;
324            sample_count = ((sample_count as f64 * self.config.growth_factor) as usize)
325                .min(self.config.max_samples);
326        }
327    }
328
329    /// Get variance with adaptive convergence
330    pub fn variance(&self) -> f64 {
331        let mut converged = self.converged_stats.borrow_mut();
332
333        if let Some(&cached) = converged.get("variance") {
334            return cached;
335        }
336
337        let mut sample_count = self.config.min_samples;
338        let mut prev_variance = 0.0;
339
340        loop {
341            self.ensure_samples(sample_count);
342            let stats = self.progressive_stats.borrow();
343            let variance = stats.variance();
344
345            if sample_count > self.config.min_samples {
346                let relative_error = if variance == 0.0 {
347                    (variance - prev_variance).abs()
348                } else {
349                    ((variance - prev_variance) / variance).abs()
350                };
351
352                if relative_error < self.config.error_threshold
353                    || sample_count >= self.config.max_samples
354                {
355                    converged.insert("variance".to_string(), variance);
356                    return variance;
357                }
358            }
359
360            prev_variance = variance;
361            sample_count = ((sample_count as f64 * self.config.growth_factor) as usize)
362                .min(self.config.max_samples);
363        }
364    }
365
366    /// Get standard deviation with adaptive convergence
367    pub fn std_dev(&self) -> f64 {
368        self.variance().sqrt()
369    }
370
371    /// Get the current sample count used
372    pub fn sample_count(&self) -> usize {
373        *self.current_samples.borrow()
374    }
375
376    /// Get summary of convergence status
377    #[must_use]
378    pub fn convergence_info(&self) -> HashMap<String, bool> {
379        let converged = self.converged_stats.borrow();
380        let mut info = HashMap::new();
381        info.insert("mean".to_string(), converged.contains_key("mean"));
382        info.insert("variance".to_string(), converged.contains_key("variance"));
383        info
384    }
385}
386
387/// Statistical analysis methods for uncertain values
388impl<T> Uncertain<T>
389where
390    T: Shareable,
391{
392    /// Estimates the mode (most frequent value) of the distribution
393    ///
394    /// # Example
395    /// ```rust
396    /// use uncertain_rs::Uncertain;
397    /// use std::collections::HashMap;
398    ///
399    /// let mut probs = HashMap::new();
400    /// probs.insert("red", 0.5);
401    /// probs.insert("blue", 0.3);
402    /// probs.insert("green", 0.2);
403    ///
404    /// let categorical = Uncertain::categorical(&probs).unwrap();
405    /// let mode = categorical.mode(1000);
406    /// // Should likely be "red"
407    /// ```
408    #[must_use]
409    pub fn mode(&self, sample_count: usize) -> Option<T>
410    where
411        T: Hash + Eq,
412    {
413        let samples = self.take_samples(sample_count);
414        if samples.is_empty() {
415            return None;
416        }
417
418        let mut counts = HashMap::new();
419        for sample in samples {
420            *counts.entry(sample).or_insert(0) += 1;
421        }
422
423        counts
424            .into_iter()
425            .max_by_key(|(_, count)| *count)
426            .map(|(value, _)| value)
427    }
428
429    /// Creates a histogram of the distribution
430    ///
431    /// # Example
432    /// ```rust
433    /// use uncertain_rs::Uncertain;
434    ///
435    /// let bernoulli = Uncertain::bernoulli(0.7);
436    /// let histogram = bernoulli.histogram(1000);
437    /// // Should show roughly 700 true, 300 false
438    /// ```
439    #[must_use]
440    pub fn histogram(&self, sample_count: usize) -> HashMap<T, usize>
441    where
442        T: Hash + Eq,
443    {
444        let samples = self.take_samples(sample_count);
445        let mut histogram = HashMap::new();
446
447        for sample in samples {
448            *histogram.entry(sample).or_insert(0) += 1;
449        }
450
451        histogram
452    }
453
454    /// Calculates the empirical entropy of the distribution in bits
455    ///
456    /// # Example
457    /// ```rust
458    /// use uncertain_rs::Uncertain;
459    ///
460    /// let uniform_coin = Uncertain::bernoulli(0.5);
461    /// let entropy = uniform_coin.entropy(1000);
462    /// // Should be close to 1.0 bit for fair coin
463    /// ```
464    #[must_use]
465    pub fn entropy(&self, sample_count: usize) -> f64
466    where
467        T: Hash + Eq,
468    {
469        let histogram = self.histogram(sample_count);
470        let total = sample_count as f64;
471
472        histogram
473            .values()
474            .map(|&count| {
475                let p = count as f64 / total;
476                if p > 0.0 { -p * p.log2() } else { 0.0 }
477            })
478            .sum()
479    }
480}
481
482/// Lazy evaluation methods for statistical operations
483impl<T> Uncertain<T>
484where
485    T: Shareable + Into<f64> + Clone,
486{
487    /// Create a lazy statistics wrapper that defers expensive computations
488    /// until they are actually needed and reuses intermediate results
489    ///
490    /// **Recommended usage**: When you need multiple statistics from the same distribution,
491    /// use this method to get a `LazyStats` object and call multiple methods on it.
492    /// This provides maximum performance by reusing samples and intermediate computations.
493    ///
494    /// # Example
495    /// ```rust
496    /// use uncertain_rs::Uncertain;
497    ///
498    /// let normal = Uncertain::normal(10.0, 2.0);
499    /// let stats = normal.lazy_stats(1000);
500    ///
501    /// // All of these reuse the same 1000 samples:
502    /// let mean = stats.mean();              // Computes samples once
503    /// let variance = stats.variance();      // Reuses samples and mean
504    /// let std_dev = stats.std_dev();        // Reuses variance
505    /// let median = stats.quantile(0.5);     // Reuses sorted samples
506    /// let (lo, hi) = stats.confidence_interval(0.95); // Reuses sorted samples
507    ///
508    /// // This is much more efficient than calling individual methods:
509    /// // normal.expected_value(1000), normal.variance(1000), etc.
510    /// ```
511    #[must_use]
512    pub fn lazy_stats(&self, sample_count: usize) -> LazyStats<T> {
513        LazyStats::new(self, sample_count)
514    }
515
516    /// Get statistics using lazy evaluation (alias for `lazy_stats`)
517    ///
518    /// This is provided as a more concise alias for users who prefer shorter method names.
519    #[must_use]
520    pub fn stats(&self, sample_count: usize) -> LazyStats<T> {
521        LazyStats::new(self, sample_count)
522    }
523
524    /// Create a progressive statistics accumulator that builds results incrementally
525    /// This is useful when you want to analyze samples as they are generated
526    ///
527    /// # Example
528    /// ```rust
529    /// use uncertain_rs::Uncertain;
530    ///
531    /// let normal = Uncertain::normal(5.0, 1.0);
532    /// let mut progressive = Uncertain::<f64>::progressive_stats();
533    ///
534    /// // Add samples incrementally
535    /// for _ in 0..1000 {
536    ///     progressive.add_sample(normal.sample());
537    /// }
538    ///
539    /// let mean = progressive.mean();
540    /// let variance = progressive.variance();
541    /// ```
542    #[must_use]
543    pub fn progressive_stats() -> ProgressiveStats {
544        ProgressiveStats::new()
545    }
546
547    /// Compute multiple statistics lazily with a single sample generation pass
548    /// This is more efficient than calling individual methods when you need multiple stats
549    ///
550    /// # Example
551    /// ```rust
552    /// use uncertain_rs::Uncertain;
553    ///
554    /// let normal = Uncertain::normal(0.0, 1.0);
555    /// let stats = normal.compute_stats_batch(1000);
556    ///
557    /// println!("Mean: {}, Std Dev: {}, Range: {}",
558    ///          stats.mean(), stats.std_dev(), stats.range());
559    /// ```
560    #[must_use]
561    pub fn compute_stats_batch(&self, sample_count: usize) -> ProgressiveStats
562    where
563        T: Into<f64>,
564    {
565        let mut stats = ProgressiveStats::new();
566        let samples = self.take_samples(sample_count);
567
568        for sample in samples {
569            stats.add_sample(sample.into());
570        }
571
572        stats
573    }
574}
575
576/// Statistical methods for numeric types
577impl<T> Uncertain<T>
578where
579    T: Clone + Send + Sync + Into<f64> + 'static,
580{
581    /// Calculates the expected value (mean) of the distribution
582    ///
583    /// **Note**: For multiple statistical operations on the same distribution,
584    /// use `lazy_stats()` to get a `LazyStats` object for optimal performance
585    /// with sample reuse and caching.
586    ///
587    /// # Example
588    /// ```rust
589    /// use uncertain_rs::Uncertain;
590    ///
591    /// let normal = Uncertain::normal(10.0, 2.0);
592    /// let mean = normal.expected_value(1000);
593    /// // Should be approximately 10.0
594    ///
595    /// // For multiple stats, use lazy_stats for better performance:
596    /// let stats = normal.lazy_stats(1000);
597    /// let mean = stats.mean();
598    /// let variance = stats.variance(); // Reuses same samples
599    /// ```
600    #[must_use]
601    pub fn expected_value(&self, sample_count: usize) -> f64
602    where
603        T: Into<f64>,
604    {
605        let samples: Vec<f64> = self
606            .take_samples(sample_count)
607            .into_iter()
608            .map(Into::into)
609            .collect();
610        samples.iter().sum::<f64>() / sample_count as f64
611    }
612
613    /// Calculates the expected value using adaptive sampling for better efficiency
614    ///
615    /// This method automatically determines the optimal sample count based on
616    /// convergence criteria, potentially improving cache hit rates for similar computations.
617    ///
618    /// # Example
619    /// ```rust
620    /// use uncertain_rs::Uncertain;
621    /// use uncertain_rs::computation::AdaptiveSampling;
622    ///
623    /// let normal = Uncertain::normal(10.0, 2.0);
624    /// let config = AdaptiveSampling::default();
625    /// let mean = normal.expected_value_adaptive(&config);
626    /// // Should be approximately 10.0 with optimal sample count
627    /// ```
628    #[must_use]
629    pub fn expected_value_adaptive(&self, config: &AdaptiveSampling) -> f64 {
630        let mut sample_count = config.min_samples;
631        let mut prev_mean = 0.0;
632
633        loop {
634            let mean = self.expected_value(sample_count);
635
636            if sample_count > config.min_samples {
637                let relative_error = ((mean - prev_mean) / mean).abs();
638                if relative_error < config.error_threshold || sample_count >= config.max_samples {
639                    return mean;
640                }
641            }
642
643            prev_mean = mean;
644            sample_count =
645                ((sample_count as f64 * config.growth_factor) as usize).min(config.max_samples);
646        }
647    }
648
649    /// Adaptive lazy statistical computation that progressively adds samples until convergence
650    /// This provides optimal performance by only computing as many samples as needed
651    ///
652    /// # Example
653    /// ```rust
654    /// use uncertain_rs::Uncertain;
655    /// use uncertain_rs::computation::AdaptiveSampling;
656    ///
657    /// let normal = Uncertain::normal(10.0, 2.0);
658    /// let config = AdaptiveSampling::default();
659    /// let lazy_stats = normal.adaptive_lazy_stats(&config);
660    ///
661    /// // Automatically uses optimal sample count for each statistic
662    /// let mean = lazy_stats.mean();
663    /// let std_dev = lazy_stats.std_dev();
664    /// ```
665    #[must_use]
666    pub fn adaptive_lazy_stats(&self, config: &AdaptiveSampling) -> AdaptiveLazyStats<T> {
667        AdaptiveLazyStats::new(self, config)
668    }
669
670    /// Calculates the variance of the distribution
671    ///
672    /// **Note**: For multiple statistical operations on the same distribution,
673    /// use `lazy_stats()` to get a `LazyStats` object for optimal performance
674    /// with sample reuse and caching.
675    ///
676    /// # Example
677    /// ```rust
678    /// use uncertain_rs::Uncertain;
679    ///
680    /// let normal = Uncertain::normal(0.0, 2.0);
681    /// let variance = normal.variance(1000);
682    /// // Should be approximately 4.0 (std_dev^2)
683    ///
684    /// // For multiple stats, use lazy_stats for better performance:
685    /// let stats = normal.lazy_stats(1000);
686    /// let variance = stats.variance();
687    /// let std_dev = stats.std_dev(); // Reuses variance calculation
688    /// ```
689    #[must_use]
690    pub fn variance(&self, sample_count: usize) -> f64
691    where
692        T: Into<f64>,
693    {
694        let samples: Vec<f64> = self
695            .take_samples(sample_count)
696            .into_iter()
697            .map(Into::into)
698            .collect();
699
700        let mean = samples.iter().sum::<f64>() / sample_count as f64;
701
702        // Use numerically stable variance calculation
703        samples
704            .iter()
705            .map(|x| {
706                let diff = x - mean;
707                diff * diff
708            })
709            .sum::<f64>()
710            / sample_count as f64
711    }
712
713    /// Calculates the standard deviation of the distribution
714    ///
715    /// **Note**: For multiple statistical operations on the same distribution,
716    /// use `lazy_stats()` to get a `LazyStats` object for optimal performance
717    /// with sample reuse and caching.
718    ///
719    /// # Example
720    /// ```rust
721    /// use uncertain_rs::Uncertain;
722    ///
723    /// let normal = Uncertain::normal(0.0, 2.0);
724    /// let std_dev = normal.standard_deviation(1000);
725    /// // Should be approximately 2.0
726    ///
727    /// // For multiple stats, use lazy_stats for better performance:
728    /// let stats = normal.lazy_stats(1000);
729    /// let std_dev = stats.std_dev();
730    /// let variance = stats.variance(); // Reuses same samples
731    /// ```
732    #[must_use]
733    pub fn standard_deviation(&self, sample_count: usize) -> f64
734    where
735        T: Into<f64>,
736    {
737        self.variance(sample_count).sqrt()
738    }
739
740    /// Calculates the skewness of the distribution
741    ///
742    /// This method uses caching to avoid recomputing the same result.
743    ///
744    /// # Example
745    /// ```rust
746    /// use uncertain_rs::Uncertain;
747    ///
748    /// let normal = Uncertain::normal(0.0, 1.0);
749    /// let skewness = normal.skewness(1000);
750    /// // Should be approximately 0 for normal distribution
751    /// ```
752    #[must_use]
753    pub fn skewness(&self, sample_count: usize) -> f64 {
754        cache::stats_cache().get_or_compute_skewness(self.id, sample_count, || {
755            let samples: Vec<f64> = self
756                .take_samples(sample_count)
757                .into_iter()
758                .map(Into::into)
759                .collect();
760
761            let mean = samples.iter().sum::<f64>() / samples.len() as f64;
762            let std_dev = self.standard_deviation(sample_count);
763
764            if std_dev == 0.0 {
765                return 0.0;
766            }
767
768            let n = samples.len() as f64;
769            samples
770                .iter()
771                .map(|x| ((x - mean) / std_dev).powi(3))
772                .sum::<f64>()
773                / n
774        })
775    }
776
777    /// Calculates the excess kurtosis of the distribution
778    ///
779    /// This method uses caching to avoid recomputing the same result.
780    ///
781    /// # Example
782    /// ```rust
783    /// use uncertain_rs::Uncertain;
784    ///
785    /// let normal = Uncertain::normal(0.0, 1.0);
786    /// let kurtosis = normal.kurtosis(1000);
787    /// // Should be approximately 0 for normal distribution (excess kurtosis)
788    /// ```
789    #[must_use]
790    pub fn kurtosis(&self, sample_count: usize) -> f64 {
791        cache::stats_cache().get_or_compute_kurtosis(self.id, sample_count, || {
792            let samples: Vec<f64> = self
793                .take_samples(sample_count)
794                .into_iter()
795                .map(Into::into)
796                .collect();
797
798            let mean = samples.iter().sum::<f64>() / samples.len() as f64;
799            let std_dev = self.standard_deviation(sample_count);
800
801            if std_dev == 0.0 {
802                return 0.0;
803            }
804
805            let n = samples.len() as f64;
806            let kurt = samples
807                .iter()
808                .map(|x| ((x - mean) / std_dev).powi(4))
809                .sum::<f64>()
810                / n;
811
812            kurt - 3.0 // Excess kurtosis (subtract 3 for normal distribution baseline)
813        })
814    }
815}
816
817/// Statistical methods for ordered types
818impl<T> Uncertain<T>
819where
820    T: Clone + Send + Sync + PartialOrd + Into<f64> + 'static,
821{
822    /// Calculates confidence interval bounds
823    ///
824    /// **Note**: For multiple statistical operations on the same distribution,
825    /// use `lazy_stats()` to get a `LazyStats` object for optimal performance
826    /// with sample reuse and caching.
827    ///
828    /// # Example
829    /// ```rust
830    /// use uncertain_rs::Uncertain;
831    ///
832    /// let normal = Uncertain::normal(100.0, 15.0);
833    /// let (lower, upper) = normal.confidence_interval(0.95, 1000);
834    /// // 95% of values should fall between lower and upper
835    ///
836    /// // For multiple stats, use lazy_stats for better performance:
837    /// let stats = normal.lazy_stats(1000);
838    /// let (lower, upper) = stats.confidence_interval(0.95);
839    /// let median = stats.quantile(0.5); // Reuses sorted samples
840    /// ```
841    #[must_use]
842    pub fn confidence_interval(&self, confidence: f64, sample_count: usize) -> (f64, f64)
843    where
844        T: Into<f64> + PartialOrd,
845    {
846        let mut samples: Vec<f64> = self
847            .take_samples(sample_count)
848            .into_iter()
849            .map(Into::into)
850            .collect();
851        samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
852
853        let alpha = 1.0 - confidence;
854        let lower_idx = ((alpha / 2.0) * samples.len() as f64) as usize;
855        let upper_idx = (((1.0 - alpha / 2.0) * samples.len() as f64) as usize).saturating_sub(1);
856
857        let lower_idx = lower_idx.min(samples.len() - 1);
858        let upper_idx = upper_idx.min(samples.len() - 1);
859
860        (samples[lower_idx], samples[upper_idx])
861    }
862
863    /// Estimates the cumulative distribution function (CDF) at a given value
864    ///
865    /// This method uses caching to avoid recomputing the same result.
866    ///
867    /// # Example
868    /// ```rust
869    /// use uncertain_rs::Uncertain;
870    ///
871    /// let normal = Uncertain::normal(0.0, 1.0);
872    /// let prob = normal.cdf(0.0, 1000);
873    /// // Should be approximately 0.5 for standard normal at 0
874    /// ```
875    #[must_use]
876    pub fn cdf(&self, value: f64, sample_count: usize) -> f64 {
877        cache::stats_cache().get_or_compute_cdf(self.id, sample_count, value, || {
878            let samples: Vec<f64> = self
879                .take_samples(sample_count)
880                .into_iter()
881                .map(Into::into)
882                .collect();
883
884            let count = samples.iter().filter(|&&x| x <= value).count();
885            count as f64 / samples.len() as f64
886        })
887    }
888
889    /// Estimates quantiles of the distribution using linear interpolation
890    ///
891    /// **Note**: For multiple statistical operations on the same distribution,
892    /// use `lazy_stats()` to get a `LazyStats` object for optimal performance
893    /// with sample reuse and caching.
894    ///
895    /// # Example
896    /// ```rust
897    /// use uncertain_rs::Uncertain;
898    ///
899    /// let normal = Uncertain::normal(0.0, 1.0);
900    /// let median = normal.quantile(0.5, 1000);
901    /// // Should be approximately 0.0 for standard normal
902    ///
903    /// // For multiple quantiles, use lazy_stats for better performance:
904    /// let stats = normal.lazy_stats(1000);
905    /// let q25 = stats.quantile(0.25);
906    /// let q75 = stats.quantile(0.75); // Reuses sorted samples
907    /// ```
908    #[must_use]
909    pub fn quantile(&self, q: f64, sample_count: usize) -> f64
910    where
911        T: Into<f64> + PartialOrd,
912    {
913        let mut samples: Vec<f64> = self
914            .take_samples(sample_count)
915            .into_iter()
916            .map(Into::into)
917            .collect();
918        samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
919
920        if samples.is_empty() {
921            return 0.0;
922        }
923        if samples.len() == 1 {
924            return samples[0];
925        }
926
927        let position = q * (samples.len() - 1) as f64;
928        let lower_idx = position.floor() as usize;
929        let upper_idx = position.ceil() as usize;
930
931        if lower_idx == upper_idx {
932            samples[lower_idx.min(samples.len() - 1)]
933        } else {
934            let lower_val = samples[lower_idx];
935            let upper_val = samples[upper_idx.min(samples.len() - 1)];
936            let weight = position - lower_idx as f64;
937            lower_val + weight * (upper_val - lower_val)
938        }
939    }
940
941    /// Calculates the interquartile range (IQR)
942    ///
943    /// # Example
944    /// ```rust
945    /// use uncertain_rs::Uncertain;
946    ///
947    /// let normal = Uncertain::normal(0.0, 1.0);
948    /// let iqr = normal.interquartile_range(1000);
949    /// // IQR for standard normal is approximately 1.35
950    /// ```
951    #[must_use]
952    pub fn interquartile_range(&self, sample_count: usize) -> f64 {
953        let q75 = self.quantile(0.75, sample_count);
954        let q25 = self.quantile(0.25, sample_count);
955        q75 - q25
956    }
957
958    /// Estimates the median absolute deviation (MAD)
959    ///
960    /// # Panics
961    ///
962    /// Panics if the samples contain values that cannot be compared (e.g., NaN values).
963    ///
964    /// # Example
965    /// ```rust
966    /// use uncertain_rs::Uncertain;
967    ///
968    /// let normal = Uncertain::normal(0.0, 1.0);
969    /// let mad = normal.median_absolute_deviation(1000);
970    /// ```
971    #[must_use]
972    pub fn median_absolute_deviation(&self, sample_count: usize) -> f64 {
973        let samples: Vec<f64> = self
974            .take_samples(sample_count)
975            .into_iter()
976            .map(std::convert::Into::into)
977            .collect();
978
979        let median = self.quantile(0.5, sample_count);
980        let mut deviations: Vec<f64> = samples.iter().map(|x| (x - median).abs()).collect();
981
982        deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
983        let mad_index = (deviations.len() - 1) / 2;
984        deviations[mad_index]
985    }
986}
987
988/// Advanced statistical methods
989impl Uncertain<f64> {
990    /// Estimates the probability density function (PDF) using kernel density estimation
991    ///
992    /// This method uses caching to avoid recomputing the same result.
993    ///
994    /// # Example
995    /// ```rust
996    /// use uncertain_rs::Uncertain;
997    ///
998    /// let normal = Uncertain::normal(0.0, 1.0);
999    /// let density = normal.pdf_kde(0.0, 1000, 0.1);
1000    /// ```
1001    #[must_use]
1002    pub fn pdf_kde(&self, x: f64, sample_count: usize, bandwidth: f64) -> f64 {
1003        cache::dist_cache().get_or_compute_pdf_kde(self.id, sample_count, x, bandwidth, || {
1004            let samples = self.take_samples(sample_count);
1005
1006            let kernel_sum: f64 = samples
1007                .iter()
1008                .map(|&xi| {
1009                    let z = (x - xi) / bandwidth;
1010                    (-0.5 * z * z).exp()
1011                })
1012                .sum();
1013
1014            kernel_sum / (sample_count as f64 * bandwidth * (2.0 * std::f64::consts::PI).sqrt())
1015        })
1016    }
1017
1018    /// Estimates the log-likelihood of a value using kernel density estimation
1019    ///
1020    /// # Example
1021    /// ```rust
1022    /// use uncertain_rs::Uncertain;
1023    ///
1024    /// let normal = Uncertain::normal(0.0, 1.0);
1025    /// let log_likelihood = normal.log_likelihood(0.0, 1000, 0.1);
1026    /// ```
1027    #[must_use]
1028    pub fn log_likelihood(&self, x: f64, sample_count: usize, bandwidth: f64) -> f64 {
1029        let pdf = self.pdf_kde(x, sample_count, bandwidth);
1030        if pdf > 0.0 {
1031            pdf.ln()
1032        } else {
1033            f64::NEG_INFINITY
1034        }
1035    }
1036
1037    /// Estimates correlation with another uncertain value
1038    ///
1039    /// # Example
1040    /// ```rust
1041    /// use uncertain_rs::Uncertain;
1042    ///
1043    /// let x = Uncertain::normal(0.0, 1.0);
1044    /// let y = x.map(|v| v * 2.0 + Uncertain::normal(0.0, 0.5).sample());
1045    /// let correlation = x.correlation(&y, 1000);
1046    /// // Should be positive correlation
1047    /// ```
1048    #[must_use]
1049    pub fn correlation(&self, other: &Uncertain<f64>, sample_count: usize) -> f64 {
1050        let samples_x = self.take_samples(sample_count);
1051        let samples_y = other.take_samples(sample_count);
1052
1053        let mean_x = samples_x.iter().sum::<f64>() / sample_count as f64;
1054        let mean_y = samples_y.iter().sum::<f64>() / sample_count as f64;
1055
1056        let numerator: f64 = samples_x
1057            .iter()
1058            .zip(samples_y.iter())
1059            .map(|(x, y)| (x - mean_x) * (y - mean_y))
1060            .sum();
1061
1062        let var_x: f64 = samples_x.iter().map(|x| (x - mean_x).powi(2)).sum();
1063
1064        let var_y: f64 = samples_y.iter().map(|y| (y - mean_y).powi(2)).sum();
1065
1066        if var_x * var_y > 0.0 {
1067            numerator / (var_x * var_y).sqrt()
1068        } else {
1069            0.0
1070        }
1071    }
1072}
1073
1074#[cfg(test)]
1075mod tests {
1076    use super::*;
1077    use std::collections::HashMap;
1078
1079    #[test]
1080    fn test_expected_value() {
1081        let normal = Uncertain::normal(10.0, 1.0);
1082        let mean = normal.expected_value(1000);
1083        assert!((mean - 10.0).abs() < 0.2);
1084    }
1085
1086    #[test]
1087    fn test_standard_deviation() {
1088        let normal = Uncertain::normal(0.0, 2.0);
1089        let std_dev = normal.standard_deviation(1000);
1090        assert!((std_dev - 2.0).abs() < 0.3);
1091    }
1092
1093    #[test]
1094    fn test_confidence_interval() {
1095        let normal = Uncertain::normal(0.0, 1.0);
1096        let (lower, upper) = normal.confidence_interval(0.95, 1000);
1097
1098        // For standard normal, 95% CI should be approximately [-1.96, 1.96]
1099        assert!(lower < -1.5 && lower > -2.5);
1100        assert!(upper > 1.5 && upper < 2.5);
1101    }
1102
1103    #[test]
1104    fn test_cdf() {
1105        let normal = Uncertain::normal(0.0, 1.0);
1106        let prob = normal.cdf(0.0, 1000);
1107
1108        // For standard normal, P(X <= 0) should be approximately 0.5
1109        assert!((prob - 0.5).abs() < 0.1);
1110    }
1111
1112    #[test]
1113    fn test_quantile() {
1114        let normal = Uncertain::normal(0.0, 1.0);
1115        let median = normal.quantile(0.5, 1000);
1116
1117        // Median of standard normal should be approximately 0
1118        assert!(median.abs() < 0.2);
1119    }
1120
1121    #[test]
1122    fn test_mode_categorical() {
1123        let mut probs = HashMap::new();
1124        probs.insert("red", 0.6);
1125        probs.insert("blue", 0.4);
1126
1127        let categorical = Uncertain::categorical(&probs).unwrap();
1128        let mode = categorical.mode(1000);
1129
1130        // Mode should likely be "red" with 60% probability
1131        assert_eq!(mode, Some("red"));
1132    }
1133
1134    #[test]
1135    fn test_entropy() {
1136        let fair_coin = Uncertain::bernoulli(0.5);
1137        let entropy = fair_coin.entropy(1000);
1138
1139        // Fair coin should have entropy close to 1 bit
1140        assert!((entropy - 1.0).abs() < 0.1);
1141
1142        let biased_coin = Uncertain::bernoulli(0.9);
1143        let biased_entropy = biased_coin.entropy(1000);
1144
1145        // Biased coin should have lower entropy
1146        assert!(biased_entropy < entropy);
1147    }
1148
1149    #[test]
1150    fn test_skewness_normal() {
1151        let normal = Uncertain::normal(0.0, 1.0);
1152        let skewness = normal.skewness(1000);
1153
1154        // Normal distribution should have skewness close to 0
1155        assert!(skewness.abs() < 0.3);
1156    }
1157
1158    #[test]
1159    fn test_kurtosis_normal() {
1160        let normal = Uncertain::normal(0.0, 1.0);
1161        let kurtosis = normal.kurtosis(1000);
1162
1163        // Normal distribution should have excess kurtosis close to 0
1164        // Allow for some statistical variance in the test
1165        assert!(kurtosis.abs() < 2.0);
1166    }
1167
1168    #[test]
1169    fn test_mode_empty_samples() {
1170        let uncertain = Uncertain::new(|| None::<i32>);
1171        let mode = uncertain.mode(0);
1172        assert_eq!(mode, None);
1173    }
1174
1175    #[test]
1176    fn test_mode_integers() {
1177        let uniform = Uncertain::new(|| if rand::random::<f64>() < 0.7 { 1 } else { 2 });
1178        let mode = uniform.mode(1000);
1179        assert_eq!(mode, Some(1));
1180    }
1181
1182    #[test]
1183    fn test_histogram_empty() {
1184        let uncertain = Uncertain::new(|| 42);
1185        let histogram = uncertain.histogram(0);
1186        assert!(histogram.is_empty());
1187    }
1188
1189    #[test]
1190    fn test_histogram_bernoulli() {
1191        let bernoulli = Uncertain::bernoulli(0.3);
1192        let histogram = bernoulli.histogram(1000);
1193
1194        let true_count = histogram.get(&true).copied().unwrap_or(0);
1195        let false_count = histogram.get(&false).copied().unwrap_or(0);
1196
1197        assert!(true_count > 200);
1198        assert!(true_count < 400);
1199        assert!(false_count > 600);
1200        assert!(false_count < 800);
1201        assert_eq!(true_count + false_count, 1000);
1202    }
1203
1204    #[test]
1205    fn test_variance_standalone() {
1206        let normal = Uncertain::normal(5.0, 3.0);
1207        let variance = normal.variance(1000);
1208        assert!((variance - 9.0).abs() < 1.5);
1209    }
1210
1211    #[test]
1212    fn test_variance_zero() {
1213        let constant = Uncertain::new(|| 42.0);
1214        let variance = constant.variance(1000);
1215        assert!(variance < 0.001);
1216    }
1217
1218    #[test]
1219    fn test_skewness_zero_std_dev() {
1220        let constant = Uncertain::new(|| 5.0);
1221        let skewness = constant.skewness(1000);
1222        assert!(skewness.abs() < f64::EPSILON);
1223    }
1224
1225    #[test]
1226    fn test_kurtosis_zero_std_dev() {
1227        let constant = Uncertain::new(|| 5.0);
1228        let kurtosis = constant.kurtosis(1000);
1229        assert!(kurtosis.abs() < f64::EPSILON);
1230    }
1231
1232    #[test]
1233    fn test_pdf_kde() {
1234        let normal = Uncertain::normal(0.0, 1.0);
1235        let density_at_mean = normal.pdf_kde(0.0, 1000, 0.1);
1236        let density_at_tail = normal.pdf_kde(3.0, 1000, 0.1);
1237
1238        assert!(density_at_mean > density_at_tail);
1239        assert!(density_at_mean > 0.0);
1240        assert!(density_at_tail > 0.0);
1241    }
1242
1243    #[test]
1244    fn test_log_likelihood() {
1245        let normal = Uncertain::normal(0.0, 1.0);
1246        let ll_at_mean = normal.log_likelihood(0.0, 1000, 0.1);
1247        let ll_at_tail = normal.log_likelihood(3.0, 1000, 0.1);
1248
1249        assert!(ll_at_mean > ll_at_tail);
1250        assert!(ll_at_mean.is_finite());
1251    }
1252
1253    #[test]
1254    fn test_log_likelihood_zero_pdf() {
1255        let point = Uncertain::new(|| 0.0);
1256        let ll = point.log_likelihood(10.0, 100, 0.01);
1257        assert!(ll.is_infinite() && ll.is_sign_negative());
1258    }
1259
1260    #[test]
1261    fn test_correlation_positive() {
1262        use std::sync::Arc;
1263        use std::sync::atomic::{AtomicUsize, Ordering};
1264
1265        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1266        let y_values = vec![2.0, 4.0, 6.0, 8.0, 10.0];
1267
1268        let x_counter = Arc::new(AtomicUsize::new(0));
1269        let x = Uncertain::new({
1270            let values = values.clone();
1271            let counter = x_counter.clone();
1272            move || {
1273                let idx = counter.fetch_add(1, Ordering::SeqCst);
1274                values[idx % values.len()]
1275            }
1276        });
1277
1278        let y_counter = Arc::new(AtomicUsize::new(0));
1279        let y = Uncertain::new({
1280            let values = y_values.clone();
1281            let counter = y_counter.clone();
1282            move || {
1283                let idx = counter.fetch_add(1, Ordering::SeqCst);
1284                values[idx % values.len()]
1285            }
1286        });
1287
1288        let correlation = x.correlation(&y, 5);
1289        assert!((correlation - 1.0).abs() < 0.001);
1290    }
1291
1292    #[test]
1293    fn test_correlation_negative() {
1294        use std::sync::Arc;
1295        use std::sync::atomic::{AtomicUsize, Ordering};
1296
1297        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1298        let y_values = vec![5.0, 4.0, 3.0, 2.0, 1.0];
1299
1300        let x_counter = Arc::new(AtomicUsize::new(0));
1301        let x = Uncertain::new({
1302            let values = values.clone();
1303            let counter = x_counter.clone();
1304            move || {
1305                let idx = counter.fetch_add(1, Ordering::SeqCst);
1306                values[idx % values.len()]
1307            }
1308        });
1309
1310        let y_counter = Arc::new(AtomicUsize::new(0));
1311        let y = Uncertain::new({
1312            let values = y_values.clone();
1313            let counter = y_counter.clone();
1314            move || {
1315                let idx = counter.fetch_add(1, Ordering::SeqCst);
1316                values[idx % values.len()]
1317            }
1318        });
1319
1320        let correlation = x.correlation(&y, 5);
1321        assert!((correlation + 1.0).abs() < 0.001);
1322    }
1323
1324    #[test]
1325    fn test_correlation_independent() {
1326        let x = Uncertain::normal(0.0, 1.0);
1327        let y = Uncertain::normal(10.0, 1.0);
1328        let correlation = x.correlation(&y, 1000);
1329
1330        assert!(correlation.abs() < 0.2);
1331    }
1332
1333    #[test]
1334    fn test_correlation_zero_variance() {
1335        let x = Uncertain::new(|| 5.0);
1336        let y = Uncertain::normal(0.0, 1.0);
1337        let correlation = x.correlation(&y, 1000);
1338
1339        assert!(correlation.abs() < f64::EPSILON);
1340    }
1341
1342    #[test]
1343    fn test_interquartile_range() {
1344        let normal = Uncertain::normal(0.0, 1.0);
1345        let iqr = normal.interquartile_range(1000);
1346
1347        assert!(iqr > 1.0);
1348        assert!(iqr < 2.0);
1349    }
1350
1351    #[test]
1352    fn test_median_absolute_deviation() {
1353        let normal = Uncertain::normal(0.0, 1.0);
1354        let mad = normal.median_absolute_deviation(1000);
1355
1356        assert!(mad > 0.5);
1357        assert!(mad < 1.0);
1358    }
1359
1360    #[test]
1361    fn test_quantile_extremes() {
1362        let normal = Uncertain::normal(0.0, 1.0);
1363        let min_quantile = normal.quantile(0.0, 1000);
1364        let max_quantile = normal.quantile(1.0, 1000);
1365
1366        assert!(min_quantile < max_quantile);
1367        assert!(min_quantile < -2.0);
1368        assert!(max_quantile > 2.0);
1369    }
1370
1371    #[test]
1372    fn test_cdf_extremes() {
1373        let normal = Uncertain::normal(0.0, 1.0);
1374        let prob_low = normal.cdf(-5.0, 1000);
1375        let prob_high = normal.cdf(5.0, 1000);
1376
1377        assert!(prob_low < 0.1);
1378        assert!(prob_high > 0.9);
1379        assert!(prob_low < prob_high);
1380    }
1381
1382    #[test]
1383    fn test_confidence_interval_different_levels() {
1384        let normal = Uncertain::normal(0.0, 1.0);
1385        let (lower_90, upper_90) = normal.confidence_interval(0.90, 1000);
1386        let (lower_99, upper_99) = normal.confidence_interval(0.99, 1000);
1387
1388        assert!(lower_99 < lower_90);
1389        assert!(upper_99 > upper_90);
1390        assert!(upper_90 - lower_90 < upper_99 - lower_99);
1391    }
1392
1393    #[test]
1394    fn test_entropy_deterministic() {
1395        let constant = Uncertain::new(|| "always");
1396        let entropy = constant.entropy(1000);
1397        assert!(entropy < 0.01);
1398    }
1399
1400    #[test]
1401    fn test_entropy_maximum() {
1402        let mut probs = HashMap::new();
1403        probs.insert("a", 0.25);
1404        probs.insert("b", 0.25);
1405        probs.insert("c", 0.25);
1406        probs.insert("d", 0.25);
1407
1408        let uniform = Uncertain::categorical(&probs).unwrap();
1409        let entropy = uniform.entropy(2000);
1410
1411        assert!((entropy - 2.0).abs() < 0.2);
1412    }
1413
1414    #[test]
1415    fn test_mode_tie_handling() {
1416        let balanced = Uncertain::new(|| if rand::random::<bool>() { 1 } else { 2 });
1417        let mode = balanced.mode(1000);
1418        assert!(mode == Some(1) || mode == Some(2));
1419    }
1420
1421    #[test]
1422    fn test_statistical_consistency() {
1423        let normal = Uncertain::normal(10.0, 2.0);
1424
1425        let mean = normal.expected_value(2000);
1426        let median = normal.quantile(0.5, 2000);
1427        let mode_samples: Vec<f64> = (0..1000).map(|_| normal.sample()).collect();
1428        let empirical_mean = mode_samples.iter().sum::<f64>() / mode_samples.len() as f64;
1429
1430        assert!((mean - 10.0).abs() < 0.3);
1431        assert!((median - 10.0).abs() < 0.3);
1432        assert!((empirical_mean - 10.0).abs() < 0.3);
1433        assert!((mean - median).abs() < 0.3);
1434    }
1435
1436    #[test]
1437    fn test_lazy_stats_basic() {
1438        let normal = Uncertain::normal(5.0, 2.0);
1439        let lazy_stats = normal.lazy_stats(1000);
1440
1441        let mean = lazy_stats.mean();
1442        assert!((mean - 5.0).abs() < 0.3);
1443
1444        let variance = lazy_stats.variance();
1445        assert!((variance - 4.0).abs() < 0.8);
1446
1447        let std_dev = lazy_stats.std_dev();
1448        assert!((std_dev - 2.0).abs() < 0.4);
1449    }
1450
1451    #[test]
1452    fn test_lazy_stats_sample_reuse() {
1453        let normal = Uncertain::normal(0.0, 1.0);
1454        let lazy_stats = normal.lazy_stats(100);
1455
1456        // First call should generate samples
1457        let samples1 = lazy_stats.samples();
1458        assert_eq!(samples1.len(), 100);
1459
1460        // Second call should reuse the same samples
1461        let samples2 = lazy_stats.samples();
1462        assert_eq!(samples1, samples2);
1463
1464        // Mean should be computed from the same samples
1465        let mean1 = lazy_stats.mean();
1466        let mean2 = lazy_stats.mean();
1467        assert!((mean1 - mean2).abs() < f64::EPSILON);
1468    }
1469
1470    #[test]
1471    fn test_progressive_stats_basic() {
1472        let mut progressive = ProgressiveStats::new();
1473
1474        assert_eq!(progressive.count(), 0);
1475        assert!(progressive.mean().abs() < f64::EPSILON);
1476        assert!(progressive.variance().abs() < f64::EPSILON);
1477
1478        progressive.add_sample(1.0);
1479        progressive.add_sample(2.0);
1480        progressive.add_sample(3.0);
1481
1482        assert_eq!(progressive.count(), 3);
1483        assert!((progressive.mean() - 2.0).abs() < f64::EPSILON);
1484        assert!((progressive.range() - 2.0).abs() < f64::EPSILON);
1485    }
1486
1487    #[test]
1488    fn test_progressive_stats_variance() {
1489        let mut progressive = ProgressiveStats::new();
1490
1491        // Add samples with known variance
1492        let samples = [1.0, 2.0, 3.0, 4.0, 5.0];
1493        for &sample in &samples {
1494            progressive.add_sample(sample);
1495        }
1496
1497        let expected_mean = 3.0;
1498        let expected_variance = 2.5; // Sample variance for [1,2,3,4,5]
1499
1500        assert!((progressive.mean() - expected_mean).abs() < 0.001);
1501        assert!((progressive.variance() - expected_variance).abs() < 0.001);
1502        assert!((progressive.std_dev() - expected_variance.sqrt()).abs() < 0.001);
1503    }
1504
1505    #[test]
1506    fn test_compute_stats_batch() {
1507        let normal = Uncertain::normal(10.0, 2.0);
1508        let batch_stats = normal.compute_stats_batch(1000);
1509
1510        assert!(batch_stats.count() == 1000);
1511        assert!((batch_stats.mean() - 10.0).abs() < 0.3);
1512        assert!((batch_stats.std_dev() - 2.0).abs() < 0.4);
1513        assert!(batch_stats.range() > 0.0);
1514    }
1515
1516    #[test]
1517    fn test_adaptive_lazy_stats_convergence() {
1518        let normal = Uncertain::normal(5.0, 1.0);
1519        let config = AdaptiveSampling {
1520            min_samples: 50,
1521            max_samples: 500,
1522            error_threshold: 0.05,
1523            growth_factor: 1.5,
1524        };
1525
1526        let adaptive_stats = normal.adaptive_lazy_stats(&config);
1527
1528        let mean = adaptive_stats.mean();
1529        assert!((mean - 5.0).abs() < 0.3);
1530
1531        let std_dev = adaptive_stats.std_dev();
1532        assert!((std_dev - 1.0).abs() < 0.3);
1533
1534        // Should have used some number of samples between min and max
1535        let sample_count = adaptive_stats.sample_count();
1536        assert!(sample_count >= config.min_samples);
1537        assert!(sample_count <= config.max_samples);
1538
1539        // Check convergence info
1540        let convergence = adaptive_stats.convergence_info();
1541        assert!(convergence.get("mean").copied().unwrap_or(false));
1542        assert!(convergence.get("variance").copied().unwrap_or(false));
1543    }
1544
1545    #[test]
1546    fn test_lazy_stats_with_quantiles() {
1547        let normal = Uncertain::normal(0.0, 1.0);
1548        let lazy_stats = normal.lazy_stats(1000);
1549
1550        let median = lazy_stats.quantile(0.5);
1551        assert!(median.abs() < 0.3); // Should be close to 0 for standard normal
1552
1553        let q25 = lazy_stats.quantile(0.25);
1554        let q75 = lazy_stats.quantile(0.75);
1555        assert!(q25 < median);
1556        assert!(median < q75);
1557
1558        let (lower, upper) = lazy_stats.confidence_interval(0.95);
1559        assert!(lower < upper);
1560        assert!(lower < median);
1561        assert!(median < upper);
1562    }
1563
1564    #[test]
1565    fn test_progressive_stats_default() {
1566        let progressive = ProgressiveStats::default();
1567        assert_eq!(progressive.count(), 0);
1568        assert!(progressive.mean().abs() < f64::EPSILON);
1569    }
1570
1571    #[test]
1572    fn test_lazy_stats_debug_and_clone() {
1573        let normal = Uncertain::normal(1.0, 1.0);
1574        let lazy_stats = normal.lazy_stats(100);
1575
1576        let debug_str = format!("{lazy_stats:?}");
1577        assert!(debug_str.contains("LazyStats"));
1578
1579        // Test that samples are consistent
1580        let mean1 = lazy_stats.mean();
1581        let mean2 = lazy_stats.mean();
1582        assert!((mean1 - mean2).abs() < f64::EPSILON);
1583    }
1584
1585    #[test]
1586    fn test_adaptive_lazy_stats_early_convergence() {
1587        // Create a deterministic distribution that should converge quickly
1588        let constant = Uncertain::new(|| 42.0);
1589        let config = AdaptiveSampling {
1590            min_samples: 10,
1591            max_samples: 1000,
1592            error_threshold: 0.01,
1593            growth_factor: 2.0,
1594        };
1595
1596        let adaptive_stats = constant.adaptive_lazy_stats(&config);
1597        let mean = adaptive_stats.mean();
1598
1599        assert!((mean - 42.0).abs() < 0.01);
1600
1601        // Should converge early due to deterministic nature
1602        let sample_count = adaptive_stats.sample_count();
1603        assert!(sample_count <= 100); // Should not need many samples for constant distribution
1604    }
1605
1606    #[test]
1607    fn test_progressive_stats_edge_cases() {
1608        let mut progressive = ProgressiveStats::new();
1609
1610        // Test with single sample
1611        progressive.add_sample(5.0);
1612        assert_eq!(progressive.count(), 1);
1613        assert!((progressive.mean() - 5.0).abs() < f64::EPSILON);
1614        assert!(progressive.variance().abs() < f64::EPSILON); // Variance is 0 for single sample
1615        assert!(progressive.range().abs() < f64::EPSILON);
1616
1617        // Add another sample
1618        progressive.add_sample(7.0);
1619        assert_eq!(progressive.count(), 2);
1620        assert!((progressive.mean() - 6.0).abs() < f64::EPSILON);
1621        assert!(progressive.variance() > 0.0);
1622        assert!((progressive.range() - 2.0).abs() < f64::EPSILON);
1623    }
1624
1625    #[test]
1626    fn test_caching_behavior() {
1627        let normal = Uncertain::normal(5.0, 1.0);
1628
1629        // With the new lazy evaluation design, each method call creates its own LazyStats,
1630        // so values won't be bitwise identical. However, they should be statistically close.
1631        let mean1 = normal.expected_value(1000);
1632        let mean2 = normal.expected_value(1000);
1633        assert!((mean1 - mean2).abs() < 0.2); // Allow for statistical variation
1634
1635        let var1 = normal.variance(1000);
1636        let var2 = normal.variance(1000);
1637        assert!((var1 - var2).abs() < 0.2); // Allow for statistical variation
1638
1639        // However, within a single LazyStats object, caching still works:
1640        let stats = normal.lazy_stats(1000);
1641        let cached_mean1 = stats.mean();
1642        let cached_mean2 = stats.mean();
1643        assert!((cached_mean1 - cached_mean2).abs() < f64::EPSILON);
1644
1645        let cached_var1 = stats.variance();
1646        let cached_var2 = stats.variance();
1647        assert!((cached_var1 - cached_var2).abs() < f64::EPSILON);
1648    }
1649
1650    #[test]
1651    fn test_large_sample_counts() {
1652        let normal = Uncertain::normal(0.0, 1.0);
1653        let mean = normal.expected_value(10000);
1654        let std_dev = normal.standard_deviation(10000);
1655
1656        assert!((mean - 0.0).abs() < 0.1);
1657        assert!((std_dev - 1.0).abs() < 0.1);
1658    }
1659
1660    #[test]
1661    fn test_histogram_with_different_types() {
1662        let chars = Uncertain::new(|| match rand::random::<u8>() % 3 {
1663            0 => 'A',
1664            1 => 'B',
1665            _ => 'C',
1666        });
1667
1668        let histogram = chars.histogram(300);
1669        assert_eq!(histogram.len(), 3);
1670        assert!(histogram.contains_key(&'A'));
1671        assert!(histogram.contains_key(&'B'));
1672        assert!(histogram.contains_key(&'C'));
1673
1674        let total: usize = histogram.values().sum();
1675        assert_eq!(total, 300);
1676    }
1677}
uncertain_rs/statistics.rs

uncertain_rs/
statistics.rs