oxilean_std/probability/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5use super::functions::*;
6/// Bernoulli random variable X ~ Bernoulli(p).
7#[allow(dead_code)]
8#[derive(Debug, Clone)]
9pub struct BernoulliRV {
10    pub p: f64,
11}
12#[allow(dead_code)]
13impl BernoulliRV {
14    pub fn new(p: f64) -> Self {
15        assert!((0.0..=1.0).contains(&p), "p must be in [0,1]");
16        Self { p }
17    }
18    /// Mean = p.
19    pub fn mean(&self) -> f64 {
20        self.p
21    }
22    /// Variance = p*(1-p).
23    pub fn variance(&self) -> f64 {
24        self.p * (1.0 - self.p)
25    }
26    /// Entropy H(X) = -p log p - (1-p) log(1-p).
27    pub fn entropy(&self) -> f64 {
28        let q = 1.0 - self.p;
29        let h_p = if self.p > 0.0 {
30            -self.p * self.p.ln()
31        } else {
32            0.0
33        };
34        let h_q = if q > 0.0 { -q * q.ln() } else { 0.0 };
35        h_p + h_q
36    }
37    /// PMF: P(X=k) for k in {0,1}.
38    pub fn pmf(&self, k: u8) -> f64 {
39        match k {
40            0 => 1.0 - self.p,
41            1 => self.p,
42            _ => 0.0,
43        }
44    }
45    /// Moment generating function M(t) = 1 - p + p*e^t.
46    pub fn mgf(&self, t: f64) -> f64 {
47        1.0 - self.p + self.p * t.exp()
48    }
49    /// Probability generating function G(z) = 1 - p + p*z.
50    pub fn pgf(&self, z: f64) -> f64 {
51        1.0 - self.p + self.p * z
52    }
53}
54/// Large deviations rate function.
55#[allow(dead_code)]
56#[derive(Debug, Clone)]
57pub struct LargeDeviations {
58    pub sequence_name: String,
59    pub rate_function: String,
60    pub is_good: bool,
61}
62#[allow(dead_code)]
63impl LargeDeviations {
64    /// Cramér's theorem for i.i.d. random variables.
65    pub fn cramer(rv_name: &str) -> Self {
66        Self {
67            sequence_name: format!("({} i.i.d.)", rv_name),
68            rate_function: "Legendre-Fenchel transform of log-mgf".to_string(),
69            is_good: true,
70        }
71    }
72    /// Sanov's theorem for empirical measures.
73    pub fn sanov() -> Self {
74        Self {
75            sequence_name: "empirical measures".to_string(),
76            rate_function: "relative entropy KL(Q||P)".to_string(),
77            is_good: true,
78        }
79    }
80    /// LDP holds.
81    pub fn ldp_description(&self) -> String {
82        format!(
83            "LDP for {} with good rate function: {}",
84            self.sequence_name, self.rate_function
85        )
86    }
87}
88/// Numerical evaluation of characteristic functions via finite-sum approximation.
89///
90/// For a discrete distribution with PMF `pmf`, computes
91/// φ(t) = Σ_k p_k · exp(i t k).
92pub struct CharacteristicFunction {
93    /// PMF values over support {0, 1, …, n-1}.
94    pub pmf: Vec<f64>,
95}
96impl CharacteristicFunction {
97    /// Creates a `CharacteristicFunction` from a PMF.
98    pub fn new(pmf: Vec<f64>) -> Self {
99        CharacteristicFunction { pmf }
100    }
101    /// Evaluates Re(φ(t)) = Σ_k p_k cos(t·k).
102    pub fn real_part(&self, t: f64) -> f64 {
103        self.pmf
104            .iter()
105            .enumerate()
106            .map(|(k, &p)| p * (t * k as f64).cos())
107            .sum()
108    }
109    /// Evaluates Im(φ(t)) = Σ_k p_k sin(t·k).
110    pub fn imag_part(&self, t: f64) -> f64 {
111        self.pmf
112            .iter()
113            .enumerate()
114            .map(|(k, &p)| p * (t * k as f64).sin())
115            .sum()
116    }
117    /// Returns |φ(t)|².
118    pub fn modulus_sq(&self, t: f64) -> f64 {
119        let re = self.real_part(t);
120        let im = self.imag_part(t);
121        re * re + im * im
122    }
123    /// Estimates the k-th moment E[X^k] via numerical differentiation of φ.
124    ///
125    /// Uses the identity φ^(k)(0) = i^k E[X^k], evaluated at h → 0.
126    /// Only reliable for small k due to floating-point cancellation.
127    pub fn moment(&self, k: u32) -> f64 {
128        self.pmf
129            .iter()
130            .enumerate()
131            .map(|(x, &p)| p * (x as f64).powi(k as i32))
132            .sum()
133    }
134}
135/// Stopping time data.
136#[allow(dead_code)]
137#[derive(Debug, Clone)]
138pub struct StoppingTime {
139    pub name: String,
140    pub filtration: String,
141    pub is_finite_as: bool,
142}
143#[allow(dead_code)]
144impl StoppingTime {
145    /// First hitting time.
146    pub fn first_hitting(set_name: &str, filtration: &str) -> Self {
147        Self {
148            name: format!("tau_{{{}}}", set_name),
149            filtration: filtration.to_string(),
150            is_finite_as: false,
151        }
152    }
153    /// Optional stopping theorem: E[M_tau] = E[M_0] under UI conditions.
154    pub fn optional_stopping_description(&self) -> String {
155        format!(
156            "Optional stopping at {} (filtration {}): E[M_tau] = E[M_0] under UI",
157            self.name, self.filtration
158        )
159    }
160}
161/// Exponential distribution with rate parameter λ.
162#[allow(dead_code)]
163pub struct ExponentialDistribution {
164    /// Rate parameter λ > 0.
165    pub lambda: f64,
166}
167#[allow(dead_code)]
168impl ExponentialDistribution {
169    /// Creates an `ExponentialDistribution` with rate λ.
170    pub fn new(lambda: f64) -> Self {
171        ExponentialDistribution { lambda }
172    }
173    /// Probability density function f(x; λ) = λ e^{-λx} for x ≥ 0.
174    pub fn pdf(&self, x: f64) -> f64 {
175        exponential_pdf(x, self.lambda)
176    }
177    /// Cumulative distribution function F(x; λ) = 1 - e^{-λx} for x ≥ 0.
178    pub fn cdf(&self, x: f64) -> f64 {
179        exponential_cdf(x, self.lambda)
180    }
181    /// Mean E[X] = 1/λ.
182    pub fn mean(&self) -> f64 {
183        1.0 / self.lambda
184    }
185    /// Variance Var[X] = 1/λ².
186    pub fn variance(&self) -> f64 {
187        1.0 / (self.lambda * self.lambda)
188    }
189    /// Inverse CDF (quantile function): F^{-1}(p) = -ln(1-p) / λ.
190    pub fn quantile(&self, p: f64) -> f64 {
191        if p <= 0.0 {
192            return 0.0;
193        }
194        if p >= 1.0 {
195            return f64::INFINITY;
196        }
197        -(1.0 - p).ln() / self.lambda
198    }
199    /// Draws a sample using the inverse-CDF method given uniform u ∈ (0,1).
200    pub fn sample(&self, u: f64) -> f64 {
201        self.quantile(u)
202    }
203    /// Moment generating function M_X(t) = λ/(λ-t) for t < λ.
204    pub fn mgf(&self, t: f64) -> f64 {
205        if t >= self.lambda {
206            return f64::INFINITY;
207        }
208        self.lambda / (self.lambda - t)
209    }
210}
211#[allow(dead_code)]
212#[derive(Debug, Clone)]
213pub struct HawkesProcess {
214    pub base_intensity: f64,
215    pub self_excitation: f64,
216    pub decay_rate: f64,
217    pub is_stationary: bool,
218}
219#[allow(dead_code)]
220impl HawkesProcess {
221    pub fn new(mu: f64, alpha: f64, beta: f64) -> Self {
222        HawkesProcess {
223            base_intensity: mu,
224            self_excitation: alpha,
225            decay_rate: beta,
226            is_stationary: alpha < beta,
227        }
228    }
229    pub fn conditional_intensity(&self, t: f64, last_event: f64) -> f64 {
230        if t > last_event {
231            self.base_intensity
232                + self.self_excitation * (-(self.decay_rate * (t - last_event))).exp()
233        } else {
234            self.base_intensity
235        }
236    }
237    pub fn mean_intensity(&self) -> f64 {
238        if self.is_stationary {
239            self.base_intensity / (1.0 - self.self_excitation / self.decay_rate)
240        } else {
241            f64::INFINITY
242        }
243    }
244    pub fn branching_ratio(&self) -> f64 {
245        self.self_excitation / self.decay_rate
246    }
247}
248/// Linear congruential generator (Park–Miller parameters).
249pub struct Lcg {
250    state: u64,
251}
252impl Lcg {
253    /// Creates an LCG seeded with `seed`.
254    pub fn new(seed: u64) -> Self {
255        Lcg { state: seed }
256    }
257    /// Returns the next pseudo-random `f64` in `[0, 1)`.
258    pub fn next_f64(&mut self) -> f64 {
259        self.state = self
260            .state
261            .wrapping_mul(6_364_136_223_846_793_005)
262            .wrapping_add(1_442_695_040_888_963_407);
263        (self.state >> 11) as f64 / (1u64 << 53) as f64
264    }
265    /// Returns the next pseudo-random `u64`.
266    pub fn next_u64(&mut self) -> u64 {
267        self.state = self
268            .state
269            .wrapping_mul(6_364_136_223_846_793_005)
270            .wrapping_add(1_442_695_040_888_963_407);
271        self.state
272    }
273}
274/// Kernel density estimator using a Gaussian kernel.
275///
276/// For a dataset x_1, …, x_n, the KDE at point x is:
277/// f̂(x) = (1/(n·h)) Σ_i K((x - x_i)/h)  where K is the standard Gaussian kernel.
278#[allow(dead_code)]
279pub struct KernelDensityEstimator {
280    /// Training data points.
281    pub data: Vec<f64>,
282    /// Bandwidth h (Silverman's rule of thumb by default).
283    pub bandwidth: f64,
284}
285#[allow(dead_code)]
286impl KernelDensityEstimator {
287    /// Creates a KDE with Silverman's rule-of-thumb bandwidth:
288    /// h = 1.06 · σ̂ · n^{-1/5}.
289    pub fn new(data: Vec<f64>) -> Self {
290        let n = data.len();
291        let bandwidth = if n < 2 {
292            1.0
293        } else {
294            let sigma = sample_variance(&data).sqrt();
295            1.06 * sigma * (n as f64).powf(-0.2)
296        };
297        KernelDensityEstimator { data, bandwidth }
298    }
299    /// Creates a KDE with an explicit bandwidth.
300    pub fn with_bandwidth(data: Vec<f64>, bandwidth: f64) -> Self {
301        KernelDensityEstimator { data, bandwidth }
302    }
303    /// Evaluates the kernel density estimate at point `x`.
304    pub fn density(&self, x: f64) -> f64 {
305        let n = self.data.len();
306        if n == 0 || self.bandwidth <= 0.0 {
307            return 0.0;
308        }
309        let sum: f64 = self
310            .data
311            .iter()
312            .map(|&xi| normal_pdf((x - xi) / self.bandwidth, 0.0, 1.0))
313            .sum();
314        sum / (n as f64 * self.bandwidth)
315    }
316    /// Evaluates the KDE over a grid of `m` equally spaced points in [lo, hi].
317    pub fn density_grid(&self, lo: f64, hi: f64, m: usize) -> Vec<(f64, f64)> {
318        if m == 0 || lo >= hi {
319            return vec![];
320        }
321        (0..m)
322            .map(|i| {
323                let x = lo + (hi - lo) * i as f64 / (m - 1).max(1) as f64;
324                (x, self.density(x))
325            })
326            .collect()
327    }
328}
329/// Geometric random variable X ~ Geom(p) (number of trials until first success).
330#[allow(dead_code)]
331#[derive(Debug, Clone)]
332pub struct GeometricRV {
333    pub p: f64,
334}
335#[allow(dead_code)]
336impl GeometricRV {
337    pub fn new(p: f64) -> Self {
338        assert!(p > 0.0 && p <= 1.0, "p must be in (0,1]");
339        Self { p }
340    }
341    /// Mean = 1/p.
342    pub fn mean(&self) -> f64 {
343        1.0 / self.p
344    }
345    /// Variance = (1-p)/p^2.
346    pub fn variance(&self) -> f64 {
347        (1.0 - self.p) / (self.p * self.p)
348    }
349    /// PMF: P(X=k) = (1-p)^(k-1) * p for k >= 1.
350    pub fn pmf(&self, k: u64) -> f64 {
351        if k == 0 {
352            return 0.0;
353        }
354        (1.0 - self.p).powi(k as i32 - 1) * self.p
355    }
356    /// CDF: P(X <= k) = 1 - (1-p)^k.
357    pub fn cdf(&self, k: u64) -> f64 {
358        1.0 - (1.0 - self.p).powi(k as i32)
359    }
360}
361#[allow(dead_code)]
362#[derive(Debug, Clone)]
363pub struct RenewalProcess {
364    pub inter_arrival_distribution: String,
365    pub mean_inter_arrival: f64,
366    pub variance_inter_arrival: f64,
367    pub rate: f64,
368}
369#[allow(dead_code)]
370impl RenewalProcess {
371    pub fn new(dist: &str, mean: f64, var: f64) -> Self {
372        RenewalProcess {
373            inter_arrival_distribution: dist.to_string(),
374            mean_inter_arrival: mean,
375            variance_inter_arrival: var,
376            rate: 1.0 / mean,
377        }
378    }
379    pub fn poisson_process(lambda: f64) -> Self {
380        RenewalProcess {
381            inter_arrival_distribution: format!("Exp({:.3})", lambda),
382            mean_inter_arrival: 1.0 / lambda,
383            variance_inter_arrival: 1.0 / (lambda * lambda),
384            rate: lambda,
385        }
386    }
387    pub fn elementary_renewal_theorem(&self) -> String {
388        format!(
389            "Elementary renewal: E[N(t)]/t → 1/μ = {:.4} as t→∞ (μ={:.3})",
390            self.rate, self.mean_inter_arrival
391        )
392    }
393    pub fn renewal_reward_theorem(&self, reward_rate: f64) -> f64 {
394        reward_rate / self.mean_inter_arrival
395    }
396    pub fn blackwell_renewal_theorem(&self) -> String {
397        format!(
398            "Blackwell: E[N(t+h) - N(t)] → h/{:.3} as t→∞ for non-arithmetic dist",
399            self.mean_inter_arrival
400        )
401    }
402}
403#[allow(dead_code)]
404#[derive(Debug, Clone)]
405pub struct GaussianProcess2 {
406    pub mean: f64,
407    pub kernel_param: f64,
408    pub num_sample_paths: usize,
409}
410#[allow(dead_code)]
411impl GaussianProcess2 {
412    pub fn new(mean: f64, kp: f64) -> Self {
413        GaussianProcess2 {
414            mean,
415            kernel_param: kp,
416            num_sample_paths: 0,
417        }
418    }
419    pub fn sample_path_continuity(&self) -> String {
420        "By Kolmogorov: GP sample paths are Hölder continuous if covariance kernel is smooth enough"
421            .to_string()
422    }
423}
424/// Dirichlet distribution Dir(alpha) — multivariate generalization of Beta.
425#[allow(dead_code)]
426#[derive(Debug, Clone)]
427pub struct DirichletRV {
428    pub alpha: Vec<f64>,
429}
430#[allow(dead_code)]
431impl DirichletRV {
432    pub fn new(alpha: Vec<f64>) -> Self {
433        for &a in &alpha {
434            assert!(a > 0.0, "alpha components must be positive");
435        }
436        Self { alpha }
437    }
438    /// Concentration parameter alpha_0 = sum of alpha.
439    pub fn alpha_0(&self) -> f64 {
440        self.alpha.iter().sum()
441    }
442    /// Mean vector: mu_i = alpha_i / alpha_0.
443    pub fn mean(&self) -> Vec<f64> {
444        let a0 = self.alpha_0();
445        self.alpha.iter().map(|&a| a / a0).collect()
446    }
447    /// Variance of i-th component: alpha_i*(alpha_0-alpha_i) / (alpha_0^2*(alpha_0+1)).
448    pub fn variance_i(&self, i: usize) -> f64 {
449        let a0 = self.alpha_0();
450        self.alpha[i] * (a0 - self.alpha[i]) / (a0 * a0 * (a0 + 1.0))
451    }
452    /// Entropy: log B(alpha) + (alpha_0 - K)*digamma(alpha_0) - sum((alpha_i-1)*digamma(alpha_i))
453    /// Approximated here using Stirling's digamma: digamma(x) ≈ ln(x) - 1/(2x).
454    pub fn entropy_approx(&self) -> f64 {
455        let a0 = self.alpha_0();
456        let k = self.alpha.len() as f64;
457        let digamma_approx = |x: f64| x.ln() - 0.5 / x;
458        let log_b: f64 =
459            self.alpha.iter().map(|&a| lgamma_approx(a)).sum::<f64>() - lgamma_approx(a0);
460        let rest: f64 = (a0 - k) * digamma_approx(a0)
461            - self
462                .alpha
463                .iter()
464                .map(|&a| (a - 1.0) * digamma_approx(a))
465                .sum::<f64>();
466        log_b + rest
467    }
468}
469#[allow(dead_code)]
470#[derive(Debug, Clone)]
471pub enum CovarianceKernel {
472    SquaredExponential { length_scale: f64, variance: f64 },
473    Matern { nu: f64, length_scale: f64 },
474    Polynomial { degree: usize, offset: f64 },
475    Linear,
476    Periodic { period: f64, length_scale: f64 },
477}
478/// Frechet distribution: X ~ Frechet(alpha, s, m).
479#[allow(dead_code)]
480#[derive(Debug, Clone)]
481pub struct FrechetRV {
482    pub alpha: f64,
483    pub s: f64,
484    pub m: f64,
485}
486#[allow(dead_code)]
487impl FrechetRV {
488    pub fn new(alpha: f64, s: f64, m: f64) -> Self {
489        assert!(alpha > 0.0 && s > 0.0, "alpha, s must be positive");
490        Self { alpha, s, m }
491    }
492    /// CDF: F(x) = exp(-(s/(x-m))^alpha) for x > m.
493    pub fn cdf(&self, x: f64) -> f64 {
494        if x <= self.m {
495            return 0.0;
496        }
497        (-(self.s / (x - self.m)).powf(self.alpha)).exp()
498    }
499    /// Mode = m + s*(alpha/(alpha+1))^(1/alpha).
500    pub fn mode(&self) -> f64 {
501        self.m + self.s * (self.alpha / (self.alpha + 1.0)).powf(1.0 / self.alpha)
502    }
503}
504/// Copula struct for bivariate dependence modeling.
505#[allow(dead_code)]
506#[derive(Debug, Clone)]
507pub enum CopulaKind {
508    Gaussian { rho: f64 },
509    Clayton { theta: f64 },
510    Gumbel { theta: f64 },
511    Frank { theta: f64 },
512    Independence,
513}
514/// Negative Binomial random variable X ~ NB(r, p).
515#[allow(dead_code)]
516#[derive(Debug, Clone)]
517pub struct NegativeBinomialRV {
518    pub r: u32,
519    pub p: f64,
520}
521#[allow(dead_code)]
522impl NegativeBinomialRV {
523    pub fn new(r: u32, p: f64) -> Self {
524        assert!(r > 0, "r must be positive");
525        assert!(p > 0.0 && p <= 1.0, "p must be in (0,1]");
526        Self { r, p }
527    }
528    /// Mean = r*(1-p)/p.
529    pub fn mean(&self) -> f64 {
530        self.r as f64 * (1.0 - self.p) / self.p
531    }
532    /// Variance = r*(1-p)/p^2.
533    pub fn variance(&self) -> f64 {
534        self.r as f64 * (1.0 - self.p) / (self.p * self.p)
535    }
536}
537#[allow(dead_code)]
538#[derive(Debug, Clone)]
539pub struct GaussianProcess {
540    pub mean_function: String,
541    pub covariance_kernel: CovarianceKernel,
542    pub input_dim: usize,
543    pub is_stationary: bool,
544}
545#[allow(dead_code)]
546impl GaussianProcess {
547    pub fn with_sq_exp(length: f64, var: f64, input_dim: usize) -> Self {
548        GaussianProcess {
549            mean_function: "zero".to_string(),
550            covariance_kernel: CovarianceKernel::SquaredExponential {
551                length_scale: length,
552                variance: var,
553            },
554            input_dim,
555            is_stationary: true,
556        }
557    }
558    pub fn with_matern(nu: f64, length: f64, input_dim: usize) -> Self {
559        GaussianProcess {
560            mean_function: "zero".to_string(),
561            covariance_kernel: CovarianceKernel::Matern {
562                nu,
563                length_scale: length,
564            },
565            input_dim,
566            is_stationary: true,
567        }
568    }
569    pub fn kernel_value(&self, d: f64) -> f64 {
570        match &self.covariance_kernel {
571            CovarianceKernel::SquaredExponential {
572                length_scale,
573                variance,
574            } => variance * (-(d * d) / (2.0 * length_scale * length_scale)).exp(),
575            CovarianceKernel::Matern { nu, length_scale } => {
576                let r = d / length_scale;
577                if *nu == 0.5 {
578                    (-r).exp()
579                } else if *nu == 1.5 {
580                    (1.0 + 3.0_f64.sqrt() * r) * (-(3.0_f64.sqrt() * r)).exp()
581                } else {
582                    (-r).exp()
583                }
584            }
585            CovarianceKernel::Linear => d,
586            CovarianceKernel::Polynomial { degree, offset } => (d + offset).powi(*degree as i32),
587            CovarianceKernel::Periodic {
588                period,
589                length_scale,
590            } => {
591                let arg = std::f64::consts::PI * d / period;
592                (-2.0 * arg.sin().powi(2) / (length_scale * length_scale)).exp()
593            }
594        }
595    }
596    pub fn posterior_description(&self, n_obs: usize) -> String {
597        format!(
598            "GP posterior: Gaussian with updated mean/covariance after {} observations",
599            n_obs
600        )
601    }
602    pub fn mercer_representation(&self) -> String {
603        "Mercer's theorem: k(x,y) = Σ λ_i φ_i(x)φ_i(y) (eigendecomposition of kernel operator)"
604            .to_string()
605    }
606}
607/// Concentration bounds for sums of independent bounded random variables.
608pub struct ConcentrationBound;
609impl ConcentrationBound {
610    /// Hoeffding's inequality: returns the upper bound on P(S_n - E[S_n] ≥ t)
611    /// for n summands each bounded in [a_i, b_i].
612    ///
613    /// Bound: exp(-2t² / Σ(b_i - a_i)²).
614    pub fn hoeffding(t: f64, intervals: &[(f64, f64)]) -> f64 {
615        let sum_sq: f64 = intervals.iter().map(|(a, b)| (b - a).powi(2)).sum();
616        if sum_sq <= 0.0 {
617            return 0.0;
618        }
619        (-2.0 * t * t / sum_sq).exp()
620    }
621    /// Markov inequality: P(X ≥ a) ≤ E[X] / a for non-negative X.
622    pub fn markov(expectation: f64, a: f64) -> f64 {
623        if a <= 0.0 {
624            return 1.0;
625        }
626        (expectation / a).min(1.0)
627    }
628    /// Chebyshev inequality: P(|X - μ| ≥ k·σ) ≤ 1/k².
629    pub fn chebyshev(k: f64) -> f64 {
630        if k <= 0.0 {
631            return 1.0;
632        }
633        (1.0 / (k * k)).min(1.0)
634    }
635    /// Chernoff bound for the sum of Bernoulli(p_i) variables with mean μ.
636    ///
637    /// Upper tail: P(X ≥ (1+δ)μ) ≤ (e^δ / (1+δ)^(1+δ))^μ.
638    pub fn chernoff_upper(mu: f64, delta: f64) -> f64 {
639        if delta <= 0.0 || mu <= 0.0 {
640            return 1.0;
641        }
642        let exponent = mu * (delta - (1.0 + delta) * (1.0 + delta).ln());
643        exponent.exp().min(1.0)
644    }
645    /// Bernstein inequality for bounded random variables with variance s².
646    ///
647    /// P(S_n ≥ t) ≤ exp(-t² / (2(s² + ct/3))) where c is the bound on individual terms.
648    pub fn bernstein(t: f64, variance_sum: f64, c: f64) -> f64 {
649        let denom = 2.0 * (variance_sum + c * t / 3.0);
650        if denom <= 0.0 {
651            return 1.0;
652        }
653        (-t * t / denom).exp().min(1.0)
654    }
655    /// Sub-Gaussian tail bound: P(X ≥ t) ≤ exp(-t²/(2σ²)) for σ-sub-Gaussian X.
656    pub fn sub_gaussian_tail(t: f64, sigma: f64) -> f64 {
657        if sigma <= 0.0 {
658            return 1.0;
659        }
660        (-t * t / (2.0 * sigma * sigma)).exp().min(1.0)
661    }
662}
663/// Gumbel distribution: X = Gumbel(mu, beta), used in extreme value theory.
664#[allow(dead_code)]
665#[derive(Debug, Clone)]
666pub struct GumbelRV {
667    pub mu: f64,
668    pub beta: f64,
669}
670#[allow(dead_code)]
671impl GumbelRV {
672    pub fn new(mu: f64, beta: f64) -> Self {
673        assert!(beta > 0.0, "beta must be positive");
674        Self { mu, beta }
675    }
676    /// CDF: F(x) = exp(-exp(-(x-mu)/beta)).
677    pub fn cdf(&self, x: f64) -> f64 {
678        (-(-(x - self.mu) / self.beta).exp()).exp()
679    }
680    /// Mean = mu + beta * euler_gamma.
681    pub fn mean(&self) -> f64 {
682        self.mu + self.beta * 0.5772156649
683    }
684    /// Variance = pi^2 * beta^2 / 6.
685    pub fn variance(&self) -> f64 {
686        std::f64::consts::PI * std::f64::consts::PI * self.beta * self.beta / 6.0
687    }
688    /// Mode = mu.
689    pub fn mode(&self) -> f64 {
690        self.mu
691    }
692}
693/// Sequential probability ratio test (SPRT) by Wald.
694#[allow(dead_code)]
695#[derive(Debug, Clone)]
696pub struct SprtTest {
697    pub h0_rate: f64,
698    pub h1_rate: f64,
699    pub alpha: f64,
700    pub beta: f64,
701    pub log_lr: f64,
702}
703#[allow(dead_code)]
704impl SprtTest {
705    pub fn new(h0_rate: f64, h1_rate: f64, alpha: f64, beta: f64) -> Self {
706        Self {
707            h0_rate,
708            h1_rate,
709            alpha,
710            beta,
711            log_lr: 0.0,
712        }
713    }
714    /// Update with new Bernoulli observation x in {0,1}.
715    pub fn update_bernoulli(&mut self, x: u8) {
716        let x = x as f64;
717        self.log_lr += x * (self.h1_rate / self.h0_rate).ln()
718            + (1.0 - x) * ((1.0 - self.h1_rate) / (1.0 - self.h0_rate)).ln();
719    }
720    /// Decision: Some(true) = reject H0, Some(false) = accept H0, None = continue.
721    pub fn decision(&self) -> Option<bool> {
722        let upper = ((1.0 - self.beta) / self.alpha).ln();
723        let lower = (self.beta / (1.0 - self.alpha)).ln();
724        if self.log_lr >= upper {
725            Some(true)
726        } else if self.log_lr <= lower {
727            Some(false)
728        } else {
729            None
730        }
731    }
732}
733/// Coupling of probability measures.
734#[allow(dead_code)]
735#[derive(Debug, Clone)]
736pub struct Coupling {
737    pub measure1: String,
738    pub measure2: String,
739    pub coupling_type: String,
740    pub tv_bound: Option<f64>,
741}
742#[allow(dead_code)]
743impl Coupling {
744    /// Maximal coupling achieving TV distance.
745    pub fn maximal(mu: &str, nu: &str, tv: f64) -> Self {
746        Self {
747            measure1: mu.to_string(),
748            measure2: nu.to_string(),
749            coupling_type: "maximal".to_string(),
750            tv_bound: Some(tv),
751        }
752    }
753    /// Optimal transport coupling (Wasserstein).
754    pub fn optimal_transport(mu: &str, nu: &str) -> Self {
755        Self {
756            measure1: mu.to_string(),
757            measure2: nu.to_string(),
758            coupling_type: "optimal transport".to_string(),
759            tv_bound: None,
760        }
761    }
762    /// P(X != Y) = TV(mu, nu) for maximal coupling.
763    pub fn maximal_coupling_property(&self) -> String {
764        if let Some(tv) = self.tv_bound {
765            format!(
766                "P(X != Y) = {:.4} = TV({}, {})",
767                tv, self.measure1, self.measure2
768            )
769        } else {
770            format!(
771                "{} coupling of {} and {}",
772                self.coupling_type, self.measure1, self.measure2
773            )
774        }
775    }
776}
777#[allow(dead_code)]
778#[derive(Debug, Clone)]
779pub struct DirichletProcess {
780    pub concentration: f64,
781    pub base_distribution: String,
782    pub is_discrete: bool,
783    pub expected_clusters: f64,
784}
785#[allow(dead_code)]
786impl DirichletProcess {
787    pub fn new(alpha: f64, base: &str) -> Self {
788        DirichletProcess {
789            concentration: alpha,
790            base_distribution: base.to_string(),
791            is_discrete: true,
792            expected_clusters: 0.0,
793        }
794    }
795    pub fn expected_clusters_for_n(&self, n: usize) -> f64 {
796        self.concentration * (1.0 + n as f64 / self.concentration).ln()
797    }
798    pub fn stick_breaking_construction(&self) -> String {
799        format!(
800            "Stick-breaking: V_k ~ Beta(1, {:.3}), w_k = V_k ∏_{{j<k}} (1-V_j)",
801            self.concentration
802        )
803    }
804    pub fn chinese_restaurant_process(&self, n: usize) -> String {
805        format!(
806            "CRP (α={:.3}, n={}): E[#tables] ≈ {:.2}",
807            self.concentration,
808            n,
809            self.expected_clusters_for_n(n)
810        )
811    }
812    pub fn posterior_update(&self, n_obs: usize) -> Self {
813        DirichletProcess {
814            concentration: self.concentration + n_obs as f64,
815            base_distribution: self.base_distribution.clone(),
816            is_discrete: true,
817            expected_clusters: self.expected_clusters_for_n(n_obs),
818        }
819    }
820}
821/// Hypergeometric distribution: drawing n items from population N with K successes.
822#[allow(dead_code)]
823#[derive(Debug, Clone)]
824pub struct HypergeometricRV {
825    pub n_pop: u64,
826    pub k_suc: u64,
827    pub n_draw: u64,
828}
829#[allow(dead_code)]
830impl HypergeometricRV {
831    pub fn new(n_pop: u64, k_suc: u64, n_draw: u64) -> Self {
832        assert!(k_suc <= n_pop, "K <= N required");
833        assert!(n_draw <= n_pop, "n <= N required");
834        Self {
835            n_pop,
836            k_suc,
837            n_draw,
838        }
839    }
840    /// Mean = n * K / N.
841    pub fn mean(&self) -> f64 {
842        self.n_draw as f64 * self.k_suc as f64 / self.n_pop as f64
843    }
844    /// Variance = n * K/N * (1 - K/N) * (N-n)/(N-1).
845    pub fn variance(&self) -> f64 {
846        let n = self.n_draw as f64;
847        let k = self.k_suc as f64;
848        let nn = self.n_pop as f64;
849        n * (k / nn) * (1.0 - k / nn) * (nn - n) / (nn - 1.0)
850    }
851}
852/// Martingale difference sequence checker.
853#[allow(dead_code)]
854#[derive(Debug, Clone)]
855pub struct MartingaleDifferenceTest {
856    pub diffs: Vec<f64>,
857}
858#[allow(dead_code)]
859impl MartingaleDifferenceTest {
860    pub fn new(series: &[f64]) -> Self {
861        let diffs: Vec<f64> = series.windows(2).map(|w| w[1] - w[0]).collect();
862        Self { diffs }
863    }
864    /// Sample mean of differences (should be ~0 for MDS).
865    pub fn mean_diff(&self) -> f64 {
866        if self.diffs.is_empty() {
867            return 0.0;
868        }
869        self.diffs.iter().sum::<f64>() / self.diffs.len() as f64
870    }
871    /// Sample variance of differences.
872    pub fn var_diff(&self) -> f64 {
873        if self.diffs.len() < 2 {
874            return 0.0;
875        }
876        let m = self.mean_diff();
877        self.diffs.iter().map(|&d| (d - m) * (d - m)).sum::<f64>() / (self.diffs.len() - 1) as f64
878    }
879    /// Test statistic: t = mean / (se) ~ N(0,1) under null.
880    pub fn t_statistic(&self) -> f64 {
881        let n = self.diffs.len() as f64;
882        let se = (self.var_diff() / n).sqrt();
883        if se == 0.0 {
884            return 0.0;
885        }
886        self.mean_diff() / se
887    }
888}
889/// Bayes factor for model comparison.
890#[allow(dead_code)]
891#[derive(Debug, Clone)]
892pub struct BayesFactor {
893    pub log_bf: f64,
894}
895#[allow(dead_code)]
896impl BayesFactor {
897    pub fn new(log_marginal_m1: f64, log_marginal_m0: f64) -> Self {
898        Self {
899            log_bf: log_marginal_m1 - log_marginal_m0,
900        }
901    }
902    /// Evidence category per Jeffreys scale.
903    pub fn jeffreys_scale(&self) -> &'static str {
904        match self.log_bf {
905            x if x < 0.0 => "Evidence for M0",
906            x if x < 1.0_f64.ln() => "Barely worth mentioning",
907            x if x < 3.0_f64.ln() => "Substantial",
908            x if x < 10.0_f64.ln() => "Strong",
909            x if x < 30.0_f64.ln() => "Very strong",
910            _ => "Decisive",
911        }
912    }
913    /// BF_10 (ratio of likelihoods).
914    pub fn bf10(&self) -> f64 {
915        self.log_bf.exp()
916    }
917    /// BF_01 (inverse).
918    pub fn bf01(&self) -> f64 {
919        (-self.log_bf).exp()
920    }
921}
922/// A discrete probability distribution backed by an explicit PMF table.
923///
924/// Sampling uses an LCG (linear congruential generator) seeded by the caller.
925pub struct DiscreteDistribution {
926    /// PMF values (must sum to 1).
927    pub pmf: Vec<f64>,
928}
929impl DiscreteDistribution {
930    /// Creates a `DiscreteDistribution` from raw weights, normalising them.
931    pub fn from_weights(weights: &[f64]) -> Self {
932        let total: f64 = weights.iter().sum();
933        let pmf = if total > 0.0 {
934            weights.iter().map(|w| w / total).collect()
935        } else {
936            vec![1.0 / weights.len() as f64; weights.len()]
937        };
938        DiscreteDistribution { pmf }
939    }
940    /// Returns the PMF value at index `k`.
941    pub fn prob(&self, k: usize) -> f64 {
942        self.pmf.get(k).copied().unwrap_or(0.0)
943    }
944    /// Draws a sample using an LCG random number in `[0, 1)`.
945    ///
946    /// Pass successive LCG outputs as `u` to simulate multiple draws.
947    pub fn sample(&self, u: f64) -> usize {
948        let mut cumulative = 0.0;
949        for (i, &p) in self.pmf.iter().enumerate() {
950            cumulative += p;
951            if u < cumulative {
952                return i;
953            }
954        }
955        self.pmf.len().saturating_sub(1)
956    }
957    /// Computes the mean (E[X]).
958    pub fn mean(&self) -> f64 {
959        self.pmf
960            .iter()
961            .enumerate()
962            .map(|(i, &p)| i as f64 * p)
963            .sum()
964    }
965    /// Computes the variance (Var[X]).
966    pub fn variance(&self) -> f64 {
967        let mu = self.mean();
968        self.pmf
969            .iter()
970            .enumerate()
971            .map(|(i, &p)| p * (i as f64 - mu).powi(2))
972            .sum()
973    }
974    /// Computes the Shannon entropy H = -Σ p log p (nats).
975    pub fn shannon_entropy(&self) -> f64 {
976        self.pmf
977            .iter()
978            .filter(|&&p| p > 0.0)
979            .map(|&p| -p * p.ln())
980            .sum()
981    }
982}
983/// Poisson process simulator and summary statistics.
984///
985/// A Poisson process N(t) with rate λ: inter-arrival times are Exp(λ).
986#[allow(dead_code)]
987pub struct PoissonProcess {
988    /// Arrival rate λ > 0.
989    pub lambda: f64,
990}
991#[allow(dead_code)]
992impl PoissonProcess {
993    /// Creates a `PoissonProcess` with rate λ.
994    pub fn new(lambda: f64) -> Self {
995        PoissonProcess { lambda }
996    }
997    /// Expected number of arrivals in [0, t]: E[N(t)] = λ t.
998    pub fn expected_count(&self, t: f64) -> f64 {
999        self.lambda * t
1000    }
1001    /// PMF of N(t): P(N(t) = k) = (λt)^k e^{-λt} / k!
1002    pub fn count_pmf(&self, t: f64, k: u32) -> f64 {
1003        poisson_pmf(self.lambda * t, k)
1004    }
1005    /// Variance of N(t): Var[N(t)] = λ t.
1006    pub fn variance_count(&self, t: f64) -> f64 {
1007        self.lambda * t
1008    }
1009    /// Generates inter-arrival times up to total time `t_max` using an LCG.
1010    /// Returns the vector of arrival times within [0, t_max].
1011    pub fn simulate_arrivals(&self, t_max: f64, lcg: &mut Lcg) -> Vec<f64> {
1012        let exp_dist = ExponentialDistribution::new(self.lambda);
1013        let mut arrivals = Vec::new();
1014        let mut current = 0.0;
1015        loop {
1016            let u = lcg.next_f64();
1017            let u = if u <= 0.0 { 1e-15 } else { u };
1018            let inter = exp_dist.sample(1.0 - u);
1019            current += inter;
1020            if current > t_max {
1021                break;
1022            }
1023            arrivals.push(current);
1024        }
1025        arrivals
1026    }
1027    /// Compound Poisson process: E[Σ Y_i] = λ t E[Y] for i.i.d. jumps Y.
1028    pub fn compound_expected(&self, t: f64, jump_mean: f64) -> f64 {
1029        self.lambda * t * jump_mean
1030    }
1031}
1032/// Beta distribution X ~ Beta(alpha, beta).
1033#[allow(dead_code)]
1034#[derive(Debug, Clone)]
1035pub struct BetaRV {
1036    pub alpha: f64,
1037    pub beta: f64,
1038}
1039#[allow(dead_code)]
1040impl BetaRV {
1041    pub fn new(alpha: f64, beta: f64) -> Self {
1042        assert!(alpha > 0.0 && beta > 0.0, "alpha, beta must be positive");
1043        Self { alpha, beta }
1044    }
1045    /// Mean = alpha / (alpha + beta).
1046    pub fn mean(&self) -> f64 {
1047        self.alpha / (self.alpha + self.beta)
1048    }
1049    /// Variance = alpha*beta / ((alpha+beta)^2 * (alpha+beta+1)).
1050    pub fn variance(&self) -> f64 {
1051        let s = self.alpha + self.beta;
1052        self.alpha * self.beta / (s * s * (s + 1.0))
1053    }
1054    /// Mode = (alpha-1)/(alpha+beta-2) for alpha,beta > 1.
1055    pub fn mode(&self) -> Option<f64> {
1056        if self.alpha > 1.0 && self.beta > 1.0 {
1057            Some((self.alpha - 1.0) / (self.alpha + self.beta - 2.0))
1058        } else {
1059            None
1060        }
1061    }
1062}
1063/// Discrete-time, finite-state Markov chain with mixing time estimation.
1064pub struct MarkovChain {
1065    /// Number of states.
1066    pub states: usize,
1067    /// Row-stochastic transition matrix: `transition[i][j]` = P(i → j).
1068    pub transition: Vec<Vec<f64>>,
1069}
1070impl MarkovChain {
1071    /// Creates a new `MarkovChain` from a row-stochastic transition matrix.
1072    pub fn new(transition: Vec<Vec<f64>>) -> Self {
1073        let states = transition.len();
1074        MarkovChain { states, transition }
1075    }
1076    /// Computes the stationary distribution via power iteration.
1077    ///
1078    /// Returns a distribution π such that π P = π.
1079    pub fn stationary_distribution(&self) -> Vec<f64> {
1080        let n = self.states;
1081        if n == 0 {
1082            return vec![];
1083        }
1084        let mut dist = vec![1.0 / n as f64; n];
1085        for _ in 0..1000 {
1086            let mut next = vec![0.0f64; n];
1087            for j in 0..n {
1088                for i in 0..n {
1089                    next[j] += dist[i] * self.transition[i][j];
1090                }
1091            }
1092            let total: f64 = next.iter().sum();
1093            if total > 0.0 {
1094                for v in next.iter_mut() {
1095                    *v /= total;
1096                }
1097            }
1098            let diff: f64 = dist
1099                .iter()
1100                .zip(next.iter())
1101                .map(|(a, b)| (a - b).abs())
1102                .sum();
1103            dist = next;
1104            if diff < 1e-10 {
1105                break;
1106            }
1107        }
1108        dist
1109    }
1110    /// Estimates the ε-mixing time: smallest t such that
1111    /// max_i d_TV(P^t(i, ·), π) ≤ ε.
1112    pub fn mixing_time(&self, epsilon: f64) -> usize {
1113        let n = self.states;
1114        if n == 0 {
1115            return 0;
1116        }
1117        let stationary = self.stationary_distribution();
1118        let mut current = vec![0.0f64; n];
1119        current[0] = 1.0;
1120        for t in 1..=10_000 {
1121            let mut next = vec![0.0f64; n];
1122            for j in 0..n {
1123                for i in 0..n {
1124                    next[j] += current[i] * self.transition[i][j];
1125                }
1126            }
1127            let tv: f64 = 0.5
1128                * next
1129                    .iter()
1130                    .zip(stationary.iter())
1131                    .map(|(a, b)| (a - b).abs())
1132                    .sum::<f64>();
1133            current = next;
1134            if tv <= epsilon {
1135                return t;
1136            }
1137        }
1138        10_000
1139    }
1140    /// Checks whether the chain is ergodic (all states communicate).
1141    pub fn is_ergodic(&self) -> bool {
1142        let n = self.states;
1143        if n == 0 {
1144            return true;
1145        }
1146        let forward = self.reachable_from(0);
1147        if forward.iter().any(|&r| !r) {
1148            return false;
1149        }
1150        for start in 0..n {
1151            let reach = self.reachable_from(start);
1152            if !reach[0] {
1153                return false;
1154            }
1155        }
1156        true
1157    }
1158    /// BFS reachability: which states are reachable from `start`?
1159    fn reachable_from(&self, start: usize) -> Vec<bool> {
1160        let n = self.states;
1161        let mut visited = vec![false; n];
1162        let mut queue = std::collections::VecDeque::new();
1163        visited[start] = true;
1164        queue.push_back(start);
1165        while let Some(cur) = queue.pop_front() {
1166            for next in 0..n {
1167                if !visited[next] && self.transition[cur][next] > 0.0 {
1168                    visited[next] = true;
1169                    queue.push_back(next);
1170                }
1171            }
1172        }
1173        visited
1174    }
1175}
1176/// Running estimate of mean and variance using Welford's online algorithm.
1177///
1178/// Processes one sample at a time in O(1) time and O(1) space.
1179#[allow(dead_code)]
1180pub struct WelfordEstimator {
1181    count: u64,
1182    mean: f64,
1183    m2: f64,
1184}
1185#[allow(dead_code)]
1186impl WelfordEstimator {
1187    /// Creates a new empty `WelfordEstimator`.
1188    pub fn new() -> Self {
1189        WelfordEstimator {
1190            count: 0,
1191            mean: 0.0,
1192            m2: 0.0,
1193        }
1194    }
1195    /// Incorporates a new observation `x`.
1196    pub fn update(&mut self, x: f64) {
1197        self.count += 1;
1198        let delta = x - self.mean;
1199        self.mean += delta / self.count as f64;
1200        let delta2 = x - self.mean;
1201        self.m2 += delta * delta2;
1202    }
1203    /// Returns the current sample count.
1204    pub fn count(&self) -> u64 {
1205        self.count
1206    }
1207    /// Returns the current mean estimate.
1208    pub fn mean(&self) -> f64 {
1209        self.mean
1210    }
1211    /// Returns the current unbiased variance estimate (n ≥ 2 required).
1212    pub fn variance(&self) -> f64 {
1213        if self.count < 2 {
1214            return 0.0;
1215        }
1216        self.m2 / (self.count - 1) as f64
1217    }
1218    /// Returns the current standard deviation estimate.
1219    pub fn std_dev(&self) -> f64 {
1220        self.variance().sqrt()
1221    }
1222    /// Merges another `WelfordEstimator` into this one (parallel algorithm).
1223    pub fn merge(&mut self, other: &WelfordEstimator) {
1224        let combined = self.count + other.count;
1225        if combined == 0 {
1226            return;
1227        }
1228        let delta = other.mean - self.mean;
1229        self.m2 = self.m2
1230            + other.m2
1231            + delta * delta * self.count as f64 * other.count as f64 / combined as f64;
1232        self.mean =
1233            (self.mean * self.count as f64 + other.mean * other.count as f64) / combined as f64;
1234        self.count = combined;
1235    }
1236}
1237#[allow(dead_code)]
1238#[derive(Debug, Clone)]
1239pub struct GaussianProcessRegression {
1240    pub gp: GaussianProcess,
1241    pub noise_variance: f64,
1242    pub n_training: usize,
1243    pub prediction_method: String,
1244}
1245#[allow(dead_code)]
1246impl GaussianProcessRegression {
1247    pub fn new(gp: GaussianProcess, noise: f64) -> Self {
1248        GaussianProcessRegression {
1249            gp,
1250            noise_variance: noise,
1251            n_training: 0,
1252            prediction_method: "exact".to_string(),
1253        }
1254    }
1255    pub fn complexity_exact(&self) -> String {
1256        format!(
1257            "Exact GPR: O(n³) training, O(n²) per prediction (n={})",
1258            self.n_training
1259        )
1260    }
1261    pub fn sparse_gp_complexity(&self, m: usize) -> String {
1262        format!(
1263            "Sparse GPR: O(nm²) training, O(m²) per prediction (m={} inducing points)",
1264            m
1265        )
1266    }
1267    pub fn log_marginal_likelihood(&self) -> String {
1268        "log p(y|X) = -½ y^T(K+σ²I)^{-1}y - ½ log|K+σ²I| - n/2 log(2π)".to_string()
1269    }
1270}
1271/// Normal (Gaussian) distribution with mean μ and standard deviation σ.
1272pub struct GaussianDistribution {
1273    /// Mean.
1274    pub mu: f64,
1275    /// Standard deviation (must be > 0).
1276    pub sigma: f64,
1277}
1278impl GaussianDistribution {
1279    /// Creates a `GaussianDistribution`.
1280    pub fn new(mu: f64, sigma: f64) -> Self {
1281        GaussianDistribution { mu, sigma }
1282    }
1283    /// Probability density function f(x; μ, σ).
1284    pub fn pdf(&self, x: f64) -> f64 {
1285        normal_pdf(x, self.mu, self.sigma)
1286    }
1287    /// Approximates the CDF Φ((x-μ)/σ) using the Abramowitz & Stegun
1288    /// rational approximation (maximum error 7.5 × 10⁻⁸).
1289    pub fn cdf(&self, x: f64) -> f64 {
1290        let z = (x - self.mu) / self.sigma;
1291        standard_normal_cdf(z)
1292    }
1293    /// Draws a sample using the Box–Muller transform given two uniform inputs.
1294    pub fn sample_box_muller(&self, u1: f64, u2: f64) -> f64 {
1295        let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
1296        self.mu + self.sigma * z
1297    }
1298    /// Returns the moment generating function M_X(t) = exp(μt + σ²t²/2).
1299    pub fn mgf(&self, t: f64) -> f64 {
1300        (self.mu * t + 0.5 * self.sigma * self.sigma * t * t).exp()
1301    }
1302    /// Returns the k-th central moment of the standard normal distribution (μ=0, σ=1).
1303    /// Even moments: (k-1)!! = 1·3·5·…·(k-1).  Odd moments: 0.
1304    pub fn standard_moment(k: u32) -> f64 {
1305        if k % 2 == 1 {
1306            return 0.0;
1307        }
1308        let mut result = 1.0f64;
1309        let mut i = 1u32;
1310        while i < k {
1311            result *= i as f64;
1312            i += 2;
1313        }
1314        result
1315    }
1316}
1317/// Empirical CDF (ECDF) from a finite sample.
1318///
1319/// F̂_n(x) = (1/n) |{i : X_i ≤ x}|.
1320#[allow(dead_code)]
1321pub struct EmpiricalCdf {
1322    /// Sorted sample values.
1323    sorted: Vec<f64>,
1324}
1325#[allow(dead_code)]
1326impl EmpiricalCdf {
1327    /// Creates an `EmpiricalCdf` from raw (unsorted) data.
1328    pub fn new(mut data: Vec<f64>) -> Self {
1329        data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1330        EmpiricalCdf { sorted: data }
1331    }
1332    /// Evaluates F̂_n(x) = P̂(X ≤ x).
1333    pub fn eval(&self, x: f64) -> f64 {
1334        let n = self.sorted.len();
1335        if n == 0 {
1336            return 0.0;
1337        }
1338        let count = self.sorted.partition_point(|&v| v <= x);
1339        count as f64 / n as f64
1340    }
1341    /// Returns the sample size.
1342    pub fn len(&self) -> usize {
1343        self.sorted.len()
1344    }
1345    /// Returns true if the sample is empty.
1346    pub fn is_empty(&self) -> bool {
1347        self.sorted.is_empty()
1348    }
1349    /// Computes the Kolmogorov–Smirnov statistic D_n = sup_x |F̂_n(x) - F(x)|
1350    /// against a reference CDF given as a closure.
1351    pub fn ks_statistic(&self, reference_cdf: impl Fn(f64) -> f64) -> f64 {
1352        let n = self.sorted.len();
1353        if n == 0 {
1354            return 0.0;
1355        }
1356        let mut max_diff: f64 = 0.0;
1357        for (i, &x) in self.sorted.iter().enumerate() {
1358            let f_hat_minus = i as f64 / n as f64;
1359            let f_hat_plus = (i + 1) as f64 / n as f64;
1360            let f_ref = reference_cdf(x);
1361            let diff = (f_hat_minus - f_ref).abs().max((f_hat_plus - f_ref).abs());
1362            if diff > max_diff {
1363                max_diff = diff;
1364            }
1365        }
1366        max_diff
1367    }
1368    /// Returns the empirical quantile at level p ∈ [0,1].
1369    pub fn quantile(&self, p: f64) -> f64 {
1370        let n = self.sorted.len();
1371        if n == 0 {
1372            return f64::NAN;
1373        }
1374        let p = p.clamp(0.0, 1.0);
1375        let idx = ((p * n as f64).ceil() as usize)
1376            .saturating_sub(1)
1377            .min(n - 1);
1378        self.sorted[idx]
1379    }
1380}
1381/// Kalman filter state estimator.
1382#[allow(dead_code)]
1383#[derive(Debug, Clone)]
1384pub struct KalmanFilter {
1385    /// State estimate x_hat.
1386    pub x_hat: Vec<f64>,
1387    /// Error covariance P.
1388    pub p: Vec<Vec<f64>>,
1389    /// State transition matrix F.
1390    pub f: Vec<Vec<f64>>,
1391    /// Observation matrix H.
1392    pub h: Vec<Vec<f64>>,
1393    /// Process noise covariance Q.
1394    pub q: Vec<Vec<f64>>,
1395    /// Measurement noise covariance R.
1396    pub r: Vec<Vec<f64>>,
1397}
1398#[allow(dead_code)]
1399impl KalmanFilter {
1400    pub fn new_1d(f: f64, h: f64, q: f64, r_val: f64, x0: f64, p0: f64) -> Self {
1401        Self {
1402            x_hat: vec![x0],
1403            p: vec![vec![p0]],
1404            f: vec![vec![f]],
1405            h: vec![vec![h]],
1406            q: vec![vec![q]],
1407            r: vec![vec![r_val]],
1408        }
1409    }
1410    /// Predict step (1-D specialization).
1411    pub fn predict_1d(&mut self) {
1412        self.x_hat[0] = self.f[0][0] * self.x_hat[0];
1413        self.p[0][0] = self.f[0][0] * self.p[0][0] * self.f[0][0] + self.q[0][0];
1414    }
1415    /// Update step (1-D specialization).
1416    pub fn update_1d(&mut self, z: f64) {
1417        let h = self.h[0][0];
1418        let s = h * self.p[0][0] * h + self.r[0][0];
1419        let k = self.p[0][0] * h / s;
1420        let y = z - h * self.x_hat[0];
1421        self.x_hat[0] += k * y;
1422        self.p[0][0] = (1.0 - k * h) * self.p[0][0];
1423    }
1424    /// Filtered state estimate.
1425    pub fn estimate(&self) -> f64 {
1426        self.x_hat[0]
1427    }
1428    /// Current error variance.
1429    pub fn error_variance(&self) -> f64 {
1430        self.p[0][0]
1431    }
1432}
1433#[allow(dead_code)]
1434#[derive(Debug, Clone)]
1435pub struct Copula {
1436    pub kind: CopulaKind,
1437}
1438#[allow(dead_code)]
1439impl Copula {
1440    pub fn gaussian(rho: f64) -> Self {
1441        assert!(rho.abs() < 1.0, "rho must be in (-1,1)");
1442        Self {
1443            kind: CopulaKind::Gaussian { rho },
1444        }
1445    }
1446    pub fn clayton(theta: f64) -> Self {
1447        assert!(theta > 0.0, "Clayton theta must be positive");
1448        Self {
1449            kind: CopulaKind::Clayton { theta },
1450        }
1451    }
1452    pub fn gumbel(theta: f64) -> Self {
1453        assert!(theta >= 1.0, "Gumbel theta must be >= 1");
1454        Self {
1455            kind: CopulaKind::Gumbel { theta },
1456        }
1457    }
1458    pub fn frank(theta: f64) -> Self {
1459        Self {
1460            kind: CopulaKind::Frank { theta },
1461        }
1462    }
1463    pub fn independence() -> Self {
1464        Self {
1465            kind: CopulaKind::Independence,
1466        }
1467    }
1468    /// Evaluate Clayton copula C(u,v) = max(u^(-theta)+v^(-theta)-1, 0)^(-1/theta).
1469    pub fn evaluate_clayton(&self, u: f64, v: f64) -> f64 {
1470        if let CopulaKind::Clayton { theta } = self.kind {
1471            let val = u.powf(-theta) + v.powf(-theta) - 1.0;
1472            if val <= 0.0 {
1473                return 0.0;
1474            }
1475            val.powf(-1.0 / theta)
1476        } else {
1477            u * v
1478        }
1479    }
1480    /// Evaluate Gumbel copula C(u,v) = exp(-[(-ln u)^theta+(-ln v)^theta]^(1/theta)).
1481    pub fn evaluate_gumbel(&self, u: f64, v: f64) -> f64 {
1482        if let CopulaKind::Gumbel { theta } = self.kind {
1483            let a = (-u.ln()).powf(theta);
1484            let b = (-v.ln()).powf(theta);
1485            (-(a + b).powf(1.0 / theta)).exp()
1486        } else {
1487            u * v
1488        }
1489    }
1490    /// Spearman's rho for Clayton copula: 3*theta/(theta+2) (approximation).
1491    pub fn spearman_rho_clayton_approx(&self) -> Option<f64> {
1492        if let CopulaKind::Clayton { theta } = self.kind {
1493            Some(3.0 * theta / (theta + 2.0))
1494        } else {
1495            None
1496        }
1497    }
1498}
1499/// Ergodic theorem variant.
1500#[allow(dead_code)]
1501#[derive(Debug, Clone)]
1502pub struct ErgodicTheoremData {
1503    pub theorem_name: String,
1504    pub convergence_type: String,
1505    pub limit: String,
1506}
1507#[allow(dead_code)]
1508impl ErgodicTheoremData {
1509    /// Birkhoff's ergodic theorem (pointwise a.e.).
1510    pub fn birkhoff(measure_preserving: &str) -> Self {
1511        Self {
1512            theorem_name: "Birkhoff".to_string(),
1513            convergence_type: "a.e. and L1".to_string(),
1514            limit: format!(
1515                "E[f | Invariant sigma-algebra]({} system)",
1516                measure_preserving
1517            ),
1518        }
1519    }
1520    /// von Neumann mean ergodic theorem (L2 convergence).
1521    pub fn von_neumann() -> Self {
1522        Self {
1523            theorem_name: "von Neumann Mean Ergodic".to_string(),
1524            convergence_type: "L2".to_string(),
1525            limit: "projection onto invariant subspace".to_string(),
1526        }
1527    }
1528}
1529/// Characteristic function (Fourier transform of measure).
1530#[allow(dead_code)]
1531#[derive(Debug, Clone)]
1532pub struct CharFunctionData {
1533    pub distribution: String,
1534    pub formula: String,
1535    pub is_integrable: bool,
1536}
1537#[allow(dead_code)]
1538impl CharFunctionData {
1539    /// Characteristic function of a Gaussian.
1540    pub fn gaussian(mean: f64, variance: f64) -> Self {
1541        Self {
1542            distribution: "Normal".to_string(),
1543            formula: format!("exp(i*{:.2}*t - {:.2}*t^2/2)", mean, variance),
1544            is_integrable: true,
1545        }
1546    }
1547    /// Characteristic function of Poisson.
1548    pub fn poisson(lambda: f64) -> Self {
1549        Self {
1550            distribution: "Poisson".to_string(),
1551            formula: format!("exp({:.2}*(e^{{it}} - 1))", lambda),
1552            is_integrable: false,
1553        }
1554    }
1555    /// Lévy-Cramér continuity theorem: convergence in distribution iff pointwise convergence of char. functions.
1556    pub fn levy_cramer_applies(&self) -> bool {
1557        true
1558    }
1559}
oxilean_std/probability/types.rs

oxilean_std/probability/
types.rs