rs_stats/distributions/
fitting.rs

1//! # Distribution Fitting
2//!
3//! High-level API for automatic distribution detection and fitting.
4//!
5//! Given a dataset, this module:
6//! 1. **Detects** whether the data is discrete or continuous (`detect_data_type`)
7//! 2. **Fits** all applicable distribution candidates using MLE or MOM
8//! 3. **Ranks** them by AIC (lower = better fit, penalised for complexity)
9//! 4. **Validates** with a Kolmogorov-Smirnov goodness-of-fit test
10//!
11//! ## Key functions
12//!
13//! | Function | Description |
14//! |----------|-------------|
15//! | `auto_fit(data)` | Auto-detect type + return single best fit |
16//! | `fit_all(data)` | All 10 continuous distributions, ranked by AIC |
17//! | `fit_best(data)` | Best continuous distribution (lowest AIC) |
18//! | `fit_all_discrete(data)` | All 4 discrete distributions, ranked by AIC |
19//! | `fit_best_discrete(data)` | Best discrete distribution |
20//! | `detect_data_type(data)` | `DataKind::Discrete` or `DataKind::Continuous` |
21//! | `ks_test(data, cdf)` | Two-sided KS test for continuous distributions |
22//! | `ks_test_discrete(data, cdf)` | KS test for discrete distributions |
23//!
24//! ## Medical example — identifying the best distribution for drug half-life
25//!
26//! ```rust
27//! use rs_stats::distributions::fitting::{fit_all, auto_fit};
28//!
29//! // Drug half-life (hours) measured in 20 patients — typically log-normal in PK studies
30//! let half_lives = vec![
31//!     4.2, 6.1, 3.8, 9.5, 5.3, 7.4, 4.9, 11.2, 3.5, 6.8,
32//!     8.1, 4.4, 5.7, 7.0, 3.9, 10.3, 5.1,  6.5, 4.7,  8.6,
33//! ];
34//!
35//! // One-call: auto-detect + return single best (lowest AIC)
36//! let best = auto_fit(&half_lives).unwrap();
37//! println!("Best fit: {} (AIC={:.2}, KS p={:.3})", best.name, best.aic, best.ks_p_value);
38//!
39//! // Full ranking for model comparison and reporting
40//! println!("{:<15} {:>8} {:>8} {:>10}", "Distribution", "AIC", "BIC", "KS p-value");
41//! for r in fit_all(&half_lives).unwrap() {
42//!     println!("{:<15} {:>8.2} {:>8.2} {:>10.4}", r.name, r.aic, r.bic, r.ks_p_value);
43//! }
44//! ```
45//!
46//! ## Medical example — discrete event counts (adverse reactions)
47//!
48//! ```rust
49//! use rs_stats::distributions::fitting::fit_all_discrete;
50//!
51//! // Adverse drug reaction counts per patient over 6 months
52//! let adr_counts = vec![0.0, 1.0, 0.0, 2.0, 0.0, 1.0, 3.0, 0.0, 1.0, 0.0,
53//!                       2.0, 1.0, 0.0, 0.0, 1.0, 4.0, 0.0, 2.0, 1.0, 0.0];
54//!
55//! for r in fit_all_discrete(&adr_counts).unwrap() {
56//!     println!("{:<20} AIC={:.2}  KS p={:.3}", r.name, r.aic, r.ks_p_value);
57//! }
58//! // Poisson usually wins when variance ≈ mean; NegativeBinomial wins if overdispersed
59//! ```
60
61use crate::distributions::{
62    beta::Beta,
63    binomial_distribution::Binomial,
64    chi_squared::ChiSquared,
65    f_distribution::FDistribution,
66    gamma_distribution::Gamma,
67    geometric::Geometric,
68    lognormal::LogNormal,
69    negative_binomial::NegativeBinomial,
70    normal_distribution::Normal,
71    poisson_distribution::Poisson,
72    student_t::StudentT,
73    traits::{DiscreteDistribution, Distribution},
74    uniform_distribution::Uniform,
75    weibull::Weibull,
76};
77use crate::error::{StatsError, StatsResult};
78use rayon::prelude::*;
79
80// ── Data kind detection ────────────────────────────────────────────────────────
81
82/// Whether a dataset looks discrete or continuous.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub enum DataKind {
85    /// All values are non-negative integers (whole numbers ≥ 0).
86    Discrete,
87    /// Contains non-integer or negative values — treated as continuous.
88    Continuous,
89}
90
91/// Infer whether `data` is discrete (all non-negative integers) or continuous.
92///
93/// # Examples
94/// ```
95/// use rs_stats::distributions::fitting::{detect_data_type, DataKind};
96///
97/// assert_eq!(detect_data_type(&[0.0, 1.0, 2.0, 3.0]), DataKind::Discrete);
98/// assert_eq!(detect_data_type(&[0.5, 1.5, 2.3]), DataKind::Continuous);
99/// ```
100pub fn detect_data_type(data: &[f64]) -> DataKind {
101    if data
102        .iter()
103        .all(|&x| x >= 0.0 && x.fract() == 0.0 && x.is_finite())
104    {
105        DataKind::Discrete
106    } else {
107        DataKind::Continuous
108    }
109}
110
111// ── Kolmogorov-Smirnov test ────────────────────────────────────────────────────
112
113/// Result of a Kolmogorov-Smirnov goodness-of-fit test.
114#[derive(Debug, Clone, Copy)]
115pub struct KsResult {
116    /// KS statistic D (maximum absolute deviation between empirical and theoretical CDF).
117    pub statistic: f64,
118    /// Approximate two-sided p-value.
119    pub p_value: f64,
120}
121
122/// Two-sided Kolmogorov-Smirnov test of `data` against `cdf`.
123///
124/// Uses the Kolmogorov distribution for the p-value approximation.
125pub fn ks_test(data: &[f64], cdf: impl Fn(f64) -> f64) -> KsResult {
126    let mut buf = Vec::with_capacity(data.len());
127    buf.extend_from_slice(data);
128    ks_test_with_scratch(&mut buf, cdf)
129}
130
131/// Zero-allocation variant of [`ks_test`] — sorts the caller-provided
132/// buffer in place and uses it as the working copy.
133///
134/// `scratch` must already contain the data to test (the function does not
135/// copy from anywhere). This lets callers reuse one buffer across many KS
136/// evaluations: see [`fit_all`] which fits 10 candidates from a single
137/// `&[f64]` input.
138pub fn ks_test_with_scratch(scratch: &mut [f64], cdf: impl Fn(f64) -> f64) -> KsResult {
139    let n = scratch.len();
140    if n == 0 {
141        return KsResult {
142            statistic: 0.0,
143            p_value: 1.0,
144        };
145    }
146    scratch.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
147
148    let nf = n as f64;
149    let mut d = 0.0_f64;
150    for (i, &x) in scratch.iter().enumerate() {
151        let f = cdf(x);
152        let upper = (i + 1) as f64 / nf;
153        let lower = i as f64 / nf;
154        d = d.max((upper - f).abs()).max((f - lower).abs());
155    }
156
157    let p_value = kolmogorov_p(((nf).sqrt() + 0.12 + 0.11 / nf.sqrt()) * d);
158
159    KsResult {
160        statistic: d,
161        p_value,
162    }
163}
164
165/// KS test for discrete distributions (uses PMF-based CDF on integer grid).
166pub fn ks_test_discrete(data: &[f64], cdf: impl Fn(u64) -> f64) -> KsResult {
167    let mut buf = Vec::with_capacity(data.len());
168    buf.extend_from_slice(data);
169    ks_test_discrete_with_scratch(&mut buf, cdf)
170}
171
172/// Zero-allocation variant of [`ks_test_discrete`].
173pub fn ks_test_discrete_with_scratch(scratch: &mut [f64], cdf: impl Fn(u64) -> f64) -> KsResult {
174    let n = scratch.len();
175    if n == 0 {
176        return KsResult {
177            statistic: 0.0,
178            p_value: 1.0,
179        };
180    }
181    scratch.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
182
183    let nf = n as f64;
184    let mut d = 0.0_f64;
185    for (i, &x) in scratch.iter().enumerate() {
186        let k = x.round() as u64;
187        let f = cdf(k);
188        let upper = (i + 1) as f64 / nf;
189        let lower = i as f64 / nf;
190        d = d.max((upper - f).abs()).max((f - lower).abs());
191    }
192
193    let p_value = kolmogorov_p(((nf).sqrt() + 0.12 + 0.11 / nf.sqrt()) * d);
194
195    KsResult {
196        statistic: d,
197        p_value,
198    }
199}
200
201/// Approximate p-value of the Kolmogorov distribution at `x`.
202fn kolmogorov_p(x: f64) -> f64 {
203    if x <= 0.0 {
204        return 1.0;
205    }
206    // P(K > x) = 2 Σ_{j=1}^∞ (−1)^{j+1} exp(−2j²x²)
207    let mut sum = 0.0_f64;
208    for j in 1_u32..=100 {
209        let term = (-(2.0 * (j as f64).powi(2) * x * x)).exp();
210        if j % 2 == 1 {
211            sum += term;
212        } else {
213            sum -= term;
214        }
215        if term < 1e-15 {
216            break;
217        }
218    }
219    (2.0 * sum).clamp(0.0, 1.0)
220}
221
222// ── Fit result ─────────────────────────────────────────────────────────────────
223
224/// Summary of a distribution fit.
225#[derive(Debug, Clone)]
226pub struct FitResult {
227    /// Distribution name (e.g. `"Normal"`, `"Gamma"`).
228    pub name: String,
229    /// Akaike Information Criterion (lower = better).
230    pub aic: f64,
231    /// Bayesian Information Criterion (lower = better).
232    pub bic: f64,
233    /// KS test statistic D.
234    pub ks_statistic: f64,
235    /// KS test p-value (higher = better fit).
236    pub ks_p_value: f64,
237}
238
239// ── Continuous fitting ─────────────────────────────────────────────────────────
240
241/// Apply [`Distribution`]-level diagnostics (AIC / BIC / KS) to one fitted
242/// distribution, producing a [`FitResult`]. Returns `None` if any of the
243/// diagnostics fails (non-finite AIC/BIC or fit error).
244fn try_fit_continuous<D: Distribution<X = f64>>(data: &[f64], dist: D) -> Option<FitResult> {
245    let aic = dist.aic(data).ok().filter(|x| x.is_finite())?;
246    let bic = dist.bic(data).ok().filter(|x| x.is_finite())?;
247    let ks = ks_test(data, |x| dist.cdf(x).unwrap_or(0.0));
248    Some(FitResult {
249        name: dist.name().to_string(),
250        aic,
251        bic,
252        ks_statistic: ks.statistic,
253        ks_p_value: ks.p_value,
254    })
255}
256
257/// Per-candidate fit closures. Each fits one distribution end-to-end and
258/// returns a [`FitResult`] (or `None` on failure). Consumed in parallel
259/// by [`fit_all`].
260type ContinuousFitter = fn(&[f64]) -> Option<FitResult>;
261
262const CONTINUOUS_FITTERS: &[ContinuousFitter] = &[
263    |d| Normal::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
264    |d| {
265        crate::distributions::exponential_distribution::Exponential::fit(d)
266            .ok()
267            .and_then(|x| try_fit_continuous(d, x))
268    },
269    |d| Uniform::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
270    |d| Gamma::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
271    |d| {
272        LogNormal::fit(d)
273            .ok()
274            .and_then(|x| try_fit_continuous(d, x))
275    },
276    |d| Weibull::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
277    |d| Beta::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
278    |d| StudentT::fit(d).ok().and_then(|x| try_fit_continuous(d, x)),
279    |d| {
280        FDistribution::fit(d)
281            .ok()
282            .and_then(|x| try_fit_continuous(d, x))
283    },
284    |d| {
285        ChiSquared::fit(d)
286            .ok()
287            .and_then(|x| try_fit_continuous(d, x))
288    },
289];
290
291/// Fit all continuous distributions to `data` and return ranked results (by AIC).
292///
293/// Each candidate is fit on its own rayon worker (10-way parallelism). The
294/// distribution candidates that fail to fit (e.g. Beta when data are not in
295/// (0,1)) are silently skipped.
296pub fn fit_all(data: &[f64]) -> StatsResult<Vec<FitResult>> {
297    if data.is_empty() {
298        return Err(StatsError::InvalidInput {
299            message: "fit_all: data must not be empty".to_string(),
300        });
301    }
302
303    let mut results: Vec<FitResult> = CONTINUOUS_FITTERS
304        .par_iter()
305        .filter_map(|fit| fit(data))
306        .collect();
307
308    if results.is_empty() {
309        return Err(StatsError::InvalidInput {
310            message: "fit_all: no distribution could be fitted to the data".to_string(),
311        });
312    }
313
314    results.sort_by(|a, b| {
315        a.aic
316            .partial_cmp(&b.aic)
317            .unwrap_or(std::cmp::Ordering::Equal)
318    });
319    Ok(results)
320}
321
322/// Fit all continuous distributions and return the best one (lowest AIC).
323pub fn fit_best(data: &[f64]) -> StatsResult<FitResult> {
324    let mut all = fit_all(data)?;
325    Ok(all.remove(0))
326}
327
328// ── Verbose fitting (with diagnostics) ────────────────────────────────────────
329
330/// A distribution candidate that failed to fit, with a human-readable reason.
331///
332/// Returned alongside successful fits by [`fit_all_verbose`] and [`fit_all_discrete_verbose`].
333#[derive(Debug, Clone)]
334pub struct SkippedFit {
335    /// Distribution name (e.g. `"Beta"`).
336    pub name: &'static str,
337    /// Why this distribution was not included (e.g. `"fit failed: data must be in (0,1)"`).
338    pub reason: String,
339}
340
341/// Like [`fit_all`] but also reports which distributions were skipped and why.
342///
343/// # Examples
344/// ```
345/// use rs_stats::distributions::fitting::fit_all_verbose;
346///
347/// // Data outside (0,1): Beta will be skipped
348/// let data = vec![2.1, 3.5, 1.8, 4.2, 2.9];
349/// let (fitted, skipped) = fit_all_verbose(&data).unwrap();
350/// println!("{} distributions fitted, {} skipped", fitted.len(), skipped.len());
351/// for s in &skipped {
352///     println!("  Skipped {}: {}", s.name, s.reason);
353/// }
354/// ```
355/// Verbose continuous fitter: returns Ok with a FitResult on success,
356/// Err with a SkippedFit (name + reason) on any failure.
357fn try_fit_verbose<D: Distribution<X = f64>>(
358    name: &'static str,
359    data: &[f64],
360    fit_res: StatsResult<D>,
361) -> Result<FitResult, SkippedFit> {
362    let dist = fit_res.map_err(|e| SkippedFit {
363        name,
364        reason: format!("fit failed: {e}"),
365    })?;
366    let aic = dist
367        .aic(data)
368        .ok()
369        .filter(|x| x.is_finite())
370        .ok_or_else(|| SkippedFit {
371            name,
372            reason: "non-finite AIC (log-likelihood diverged)".to_string(),
373        })?;
374    let bic = dist
375        .bic(data)
376        .ok()
377        .filter(|x| x.is_finite())
378        .ok_or_else(|| SkippedFit {
379            name,
380            reason: "non-finite BIC".to_string(),
381        })?;
382    let ks = ks_test(data, |x| dist.cdf(x).unwrap_or(0.0));
383    Ok(FitResult {
384        name: dist.name().to_string(),
385        aic,
386        bic,
387        ks_statistic: ks.statistic,
388        ks_p_value: ks.p_value,
389    })
390}
391
392type ContinuousVerboseFitter = fn(&[f64]) -> Result<FitResult, SkippedFit>;
393
394const CONTINUOUS_VERBOSE_FITTERS: &[ContinuousVerboseFitter] = &[
395    |d| try_fit_verbose("Normal", d, Normal::fit(d)),
396    |d| {
397        try_fit_verbose(
398            "Exponential",
399            d,
400            crate::distributions::exponential_distribution::Exponential::fit(d),
401        )
402    },
403    |d| try_fit_verbose("Uniform", d, Uniform::fit(d)),
404    |d| try_fit_verbose("Gamma", d, Gamma::fit(d)),
405    |d| try_fit_verbose("LogNormal", d, LogNormal::fit(d)),
406    |d| try_fit_verbose("Weibull", d, Weibull::fit(d)),
407    |d| try_fit_verbose("Beta", d, Beta::fit(d)),
408    |d| try_fit_verbose("StudentT", d, StudentT::fit(d)),
409    |d| try_fit_verbose("FDistribution", d, FDistribution::fit(d)),
410    |d| try_fit_verbose("ChiSquared", d, ChiSquared::fit(d)),
411];
412
413pub fn fit_all_verbose(data: &[f64]) -> StatsResult<(Vec<FitResult>, Vec<SkippedFit>)> {
414    if data.is_empty() {
415        return Err(StatsError::InvalidInput {
416            message: "fit_all_verbose: data must not be empty".to_string(),
417        });
418    }
419
420    let outcomes: Vec<Result<FitResult, SkippedFit>> = CONTINUOUS_VERBOSE_FITTERS
421        .par_iter()
422        .map(|f| f(data))
423        .collect();
424
425    let (mut results, skipped): (Vec<FitResult>, Vec<SkippedFit>) =
426        outcomes
427            .into_iter()
428            .fold((Vec::new(), Vec::new()), |(mut ok, mut err), o| {
429                match o {
430                    Ok(r) => ok.push(r),
431                    Err(s) => err.push(s),
432                }
433                (ok, err)
434            });
435
436    if results.is_empty() {
437        return Err(StatsError::InvalidInput {
438            message: "fit_all_verbose: no distribution could be fitted to the data".to_string(),
439        });
440    }
441
442    results.sort_by(|a, b| {
443        a.aic
444            .partial_cmp(&b.aic)
445            .unwrap_or(std::cmp::Ordering::Equal)
446    });
447    Ok((results, skipped))
448}
449
450/// Apply [`DiscreteDistribution`]-level diagnostics (AIC / BIC / KS) to one
451/// fitted distribution.
452fn try_fit_discrete<D: DiscreteDistribution>(
453    data: &[f64],
454    int_data: &[u64],
455    dist: D,
456) -> Option<FitResult> {
457    let aic = dist.aic(int_data).ok().filter(|x| x.is_finite())?;
458    let bic = dist.bic(int_data).ok().filter(|x| x.is_finite())?;
459    let ks = ks_test_discrete(data, |k| dist.cdf(k).unwrap_or(0.0));
460    Some(FitResult {
461        name: dist.name().to_string(),
462        aic,
463        bic,
464        ks_statistic: ks.statistic,
465        ks_p_value: ks.p_value,
466    })
467}
468
469fn try_fit_discrete_verbose<D: DiscreteDistribution>(
470    name: &'static str,
471    data: &[f64],
472    int_data: &[u64],
473    fit_res: StatsResult<D>,
474) -> Result<FitResult, SkippedFit> {
475    let dist = fit_res.map_err(|e| SkippedFit {
476        name,
477        reason: format!("fit failed: {e}"),
478    })?;
479    let aic = dist
480        .aic(int_data)
481        .ok()
482        .filter(|x| x.is_finite())
483        .ok_or_else(|| SkippedFit {
484            name,
485            reason: "non-finite AIC".to_string(),
486        })?;
487    let bic = dist
488        .bic(int_data)
489        .ok()
490        .filter(|x| x.is_finite())
491        .ok_or_else(|| SkippedFit {
492            name,
493            reason: "non-finite BIC".to_string(),
494        })?;
495    let ks = ks_test_discrete(data, |k| dist.cdf(k).unwrap_or(0.0));
496    Ok(FitResult {
497        name: dist.name().to_string(),
498        aic,
499        bic,
500        ks_statistic: ks.statistic,
501        ks_p_value: ks.p_value,
502    })
503}
504
505type DiscreteFitter = fn(&[f64], &[u64]) -> Option<FitResult>;
506type DiscreteVerboseFitter = fn(&[f64], &[u64]) -> Result<FitResult, SkippedFit>;
507
508const DISCRETE_FITTERS: &[DiscreteFitter] = &[
509    |d, i| Poisson::fit(d).ok().and_then(|x| try_fit_discrete(d, i, x)),
510    |d, i| {
511        Geometric::fit(d)
512            .ok()
513            .and_then(|x| try_fit_discrete(d, i, x))
514    },
515    |d, i| {
516        NegativeBinomial::fit(d)
517            .ok()
518            .and_then(|x| try_fit_discrete(d, i, x))
519    },
520    |d, i| {
521        Binomial::fit(d)
522            .ok()
523            .and_then(|x| try_fit_discrete(d, i, x))
524    },
525];
526
527const DISCRETE_VERBOSE_FITTERS: &[DiscreteVerboseFitter] = &[
528    |d, i| try_fit_discrete_verbose("Poisson", d, i, Poisson::fit(d)),
529    |d, i| try_fit_discrete_verbose("Geometric", d, i, Geometric::fit(d)),
530    |d, i| try_fit_discrete_verbose("NegativeBinomial", d, i, NegativeBinomial::fit(d)),
531    |d, i| try_fit_discrete_verbose("Binomial", d, i, Binomial::fit(d)),
532];
533
534/// Like [`fit_all_discrete`] but also reports which distributions were skipped and why.
535pub fn fit_all_discrete_verbose(data: &[f64]) -> StatsResult<(Vec<FitResult>, Vec<SkippedFit>)> {
536    if data.is_empty() {
537        return Err(StatsError::InvalidInput {
538            message: "fit_all_discrete_verbose: data must not be empty".to_string(),
539        });
540    }
541
542    let int_data: Vec<u64> = data.iter().map(|&x| x.round() as u64).collect();
543
544    let outcomes: Vec<Result<FitResult, SkippedFit>> = DISCRETE_VERBOSE_FITTERS
545        .par_iter()
546        .map(|f| f(data, &int_data))
547        .collect();
548
549    let (mut results, skipped): (Vec<FitResult>, Vec<SkippedFit>) =
550        outcomes
551            .into_iter()
552            .fold((Vec::new(), Vec::new()), |(mut ok, mut err), o| {
553                match o {
554                    Ok(r) => ok.push(r),
555                    Err(s) => err.push(s),
556                }
557                (ok, err)
558            });
559
560    if results.is_empty() {
561        return Err(StatsError::InvalidInput {
562            message: "fit_all_discrete_verbose: no distribution could be fitted".to_string(),
563        });
564    }
565
566    results.sort_by(|a, b| {
567        a.aic
568            .partial_cmp(&b.aic)
569            .unwrap_or(std::cmp::Ordering::Equal)
570    });
571    Ok((results, skipped))
572}
573
574// ── Discrete fitting ───────────────────────────────────────────────────────────
575
576/// Fit all discrete distributions to integer `data` (passed as f64) and return ranked results.
577///
578/// Each candidate is fit on its own rayon worker. Distributions that fail
579/// to fit are silently skipped.
580pub fn fit_all_discrete(data: &[f64]) -> StatsResult<Vec<FitResult>> {
581    if data.is_empty() {
582        return Err(StatsError::InvalidInput {
583            message: "fit_all_discrete: data must not be empty".to_string(),
584        });
585    }
586
587    let int_data: Vec<u64> = data.iter().map(|&x| x.round() as u64).collect();
588
589    let mut results: Vec<FitResult> = DISCRETE_FITTERS
590        .par_iter()
591        .filter_map(|f| f(data, &int_data))
592        .collect();
593
594    if results.is_empty() {
595        return Err(StatsError::InvalidInput {
596            message: "fit_all_discrete: no distribution could be fitted to the data".to_string(),
597        });
598    }
599
600    results.sort_by(|a, b| {
601        a.aic
602            .partial_cmp(&b.aic)
603            .unwrap_or(std::cmp::Ordering::Equal)
604    });
605    Ok(results)
606}
607
608/// Fit discrete distributions and return the best (lowest AIC).
609pub fn fit_best_discrete(data: &[f64]) -> StatsResult<FitResult> {
610    let mut all = fit_all_discrete(data)?;
611    Ok(all.remove(0))
612}
613
614// ── Auto-detect and fit ────────────────────────────────────────────────────────
615
616/// Automatically detect whether data is discrete or continuous, then fit all applicable
617/// distributions and return the best match (lowest AIC).
618///
619/// # Examples
620/// ```
621/// use rs_stats::distributions::fitting::auto_fit;
622///
623/// let data = vec![1.2, 2.3, 1.8, 2.9, 1.5];
624/// let best = auto_fit(&data).unwrap();
625/// println!("Best fit: {}", best.name);
626/// ```
627pub fn auto_fit(data: &[f64]) -> StatsResult<FitResult> {
628    match detect_data_type(data) {
629        DataKind::Discrete => fit_best_discrete(data),
630        DataKind::Continuous => fit_best(data),
631    }
632}
633
634#[cfg(test)]
635mod tests {
636    use super::*;
637
638    #[test]
639    fn test_detect_data_type_discrete() {
640        assert_eq!(detect_data_type(&[0.0, 1.0, 2.0, 3.0]), DataKind::Discrete);
641        assert_eq!(detect_data_type(&[0.0, 0.0, 1.0]), DataKind::Discrete);
642    }
643
644    #[test]
645    fn test_detect_data_type_continuous() {
646        assert_eq!(detect_data_type(&[0.5, 1.5, 2.3]), DataKind::Continuous);
647        assert_eq!(detect_data_type(&[-1.0, 0.0, 1.0]), DataKind::Continuous);
648        assert_eq!(detect_data_type(&[1.0, 2.5, 3.0]), DataKind::Continuous);
649    }
650
651    #[test]
652    fn test_ks_test_uniform() {
653        // Data from Uniform(0,1) should give large p-value against U(0,1) CDF
654        let data: Vec<f64> = (0..20).map(|i| i as f64 / 20.0).collect();
655        let ks = ks_test(&data, |x| x.clamp(0.0, 1.0));
656        assert!(ks.statistic < 0.15);
657    }
658
659    #[test]
660    fn test_fit_all_returns_results() {
661        let data: Vec<f64> = (0..50).map(|i| (i as f64) * 0.1 + 0.5).collect();
662        let results = fit_all(&data).unwrap();
663        assert!(!results.is_empty());
664        // Results sorted by AIC (ascending)
665        for i in 1..results.len() {
666            assert!(results[i].aic >= results[i - 1].aic);
667        }
668    }
669
670    #[test]
671    fn test_fit_best_normal_data() {
672        // Data generated from N(5, 1)
673        let data = vec![
674            4.1, 5.2, 5.8, 4.7, 5.3, 4.9, 6.1, 4.5, 5.5, 5.0, 4.8, 5.1, 4.3, 5.7, 4.6, 5.4, 4.2,
675            5.9, 5.2, 4.4,
676        ];
677        let best = fit_best(&data).unwrap();
678        // Normal should win (or be competitive)
679        assert!(best.aic.is_finite());
680    }
681
682    #[test]
683    fn test_fit_all_discrete() {
684        let data = vec![0.0, 1.0, 2.0, 3.0, 1.0, 0.0, 2.0, 1.0, 0.0, 4.0];
685        let results = fit_all_discrete(&data).unwrap();
686        assert!(!results.is_empty());
687    }
688
689    #[test]
690    fn test_auto_fit_continuous() {
691        let data = vec![1.5, 2.3, 1.8, 2.1, 2.7, 1.9, 2.4, 2.0];
692        let best = auto_fit(&data).unwrap();
693        assert!(!best.name.is_empty());
694    }
695
696    #[test]
697    fn test_auto_fit_discrete() {
698        let data = vec![0.0, 1.0, 2.0, 1.0, 0.0, 3.0, 1.0, 2.0];
699        let best = auto_fit(&data).unwrap();
700        assert!(!best.name.is_empty());
701    }
702
703    #[test]
704    fn test_fit_all_empty_data() {
705        assert!(fit_all(&[]).is_err());
706    }
707}
rs_stats/distributions/fitting.rs

rs_stats/distributions/
fitting.rs