numrs2/random/
distributions_enhanced.rs

1//! Enhanced random distributions
2//!
3//! This module expands the random functionality by providing additional
4//! distributions and utility functions for more specialized statistical applications.
5
6use crate::array::Array;
7use crate::error::Result;
8use crate::random::state::RandomState;
9use num_traits::{Float, NumCast};
10use scirs2_core::random::prelude::Rng;
11use std::fmt::{Debug, Display};
12
13/// Get a reference to the global random state
14fn get_global_random_state() -> Result<std::sync::MutexGuard<'static, RandomState>> {
15    crate::random::distributions::get_global_random_state()
16}
17
18/// Generate random values from a truncated normal distribution
19///
20/// # Arguments
21///
22/// * `mean` - Mean of the normal distribution (before truncation)
23/// * `std` - Standard deviation of the normal distribution (before truncation)
24/// * `low` - Lower bound for truncation
25/// * `high` - Upper bound for truncation
26/// * `shape` - Shape of the output array
27///
28/// # Returns
29///
30/// An array of random values from the truncated normal distribution
31pub fn truncated_normal<T>(mean: T, std: T, low: T, high: T, shape: &[usize]) -> Result<Array<T>>
32where
33    T: Float
34        + NumCast
35        + Clone
36        + Debug
37        + Display
38        + scirs2_core::ndarray::distributions::uniform::SampleUniform,
39{
40    let rng = get_global_random_state()?;
41    rng.truncated_normal(mean, std, low, high, shape)
42}
43
44/// Generate random values from a von Mises distribution
45///
46/// # Arguments
47///
48/// * `mu` - Location parameter (mean direction)
49/// * `kappa` - Concentration parameter
50/// * `shape` - Shape of the output array
51///
52/// # Returns
53///
54/// An array of random values from the von Mises distribution
55pub fn vonmises<T>(mu: T, kappa: T, shape: &[usize]) -> Result<Array<T>>
56where
57    T: Float + NumCast + Clone + Debug + Display,
58{
59    let rng = get_global_random_state()?;
60    rng.vonmises(mu, kappa, shape)
61}
62
63/// Generate random values from a non-central chi-square distribution
64///
65/// # Arguments
66///
67/// * `df` - Degrees of freedom
68/// * `nonc` - Non-centrality parameter
69/// * `shape` - Shape of the output array
70///
71/// # Returns
72///
73/// An array of random values from the non-central chi-square distribution
74pub fn noncentral_chisquare<T>(df: T, nonc: T, shape: &[usize]) -> Result<Array<T>>
75where
76    T: Float + NumCast + Clone + Debug + Display,
77{
78    let rng = get_global_random_state()?;
79    rng.noncentral_chisquare(df, nonc, shape)
80}
81
82/// Generate random values from a non-central F distribution
83///
84/// # Arguments
85///
86/// * `dfnum` - Numerator degrees of freedom
87/// * `dfden` - Denominator degrees of freedom
88/// * `nonc` - Non-centrality parameter
89/// * `shape` - Shape of the output array
90///
91/// # Returns
92///
93/// An array of random values from the non-central F distribution
94pub fn noncentral_f<T>(dfnum: T, dfden: T, nonc: T, shape: &[usize]) -> Result<Array<T>>
95where
96    T: Float + NumCast + Clone + Debug + Display,
97{
98    let rng = get_global_random_state()?;
99    rng.noncentral_f(dfnum, dfden, nonc, shape)
100}
101
102/// Generate random values from a Maxwell distribution
103///
104/// # Arguments
105///
106/// * `scale` - Scale parameter
107/// * `shape` - Shape of the output array
108///
109/// # Returns
110///
111/// An array of random values from the Maxwell distribution
112pub fn maxwell<T>(scale: T, shape: &[usize]) -> Result<Array<T>>
113where
114    T: Float + NumCast + Clone + Debug + Display,
115{
116    let rng = get_global_random_state()?;
117    rng.maxwell(scale, shape)
118}
119
120/// Generate random values from a power distribution
121///
122/// # Arguments
123///
124/// * `a` - Power parameter
125/// * `shape` - Shape of the output array
126///
127/// # Returns
128///
129/// An array of random values from the power distribution
130pub fn power<T>(a: T, shape: &[usize]) -> Result<Array<T>>
131where
132    T: Float + NumCast + Clone + Debug + Display,
133{
134    let rng = get_global_random_state()?;
135    rng.power(a, shape)
136}
137
138/// Generate correlated random variables using the Cholesky decomposition
139///
140/// # Arguments
141///
142/// * `means` - Vector of means for each variable
143/// * `cov` - Covariance matrix
144/// * `size` - Number of samples to generate
145///
146/// # Returns
147///
148/// An array of correlated random samples with shape [size, n]
149pub fn multivariate_normal_cholesky<T>(means: &[T], cov: &Array<T>, size: usize) -> Result<Array<T>>
150where
151    T: Float + NumCast + Clone + Debug + Display,
152{
153    let rng = get_global_random_state()?;
154    rng.multivariate_normal_cholesky(means, cov, size)
155}
156
157/// Generate a random correlation matrix
158///
159/// # Arguments
160///
161/// * `n` - Dimension of the correlation matrix
162///
163/// # Returns
164///
165/// A random correlation matrix of shape [n, n]
166pub fn random_correlation_matrix<T>(n: usize) -> Result<Array<T>>
167where
168    T: Float
169        + NumCast
170        + Clone
171        + Debug
172        + Display
173        + scirs2_core::ndarray::distributions::uniform::SampleUniform,
174{
175    let rng = get_global_random_state()?;
176    rng.random_correlation_matrix(n)
177}
178
179/// Generate random samples from a mixture of distributions
180///
181/// # Arguments
182///
183/// * `weights` - Weights for each component in the mixture
184/// * `means` - Mean for each component
185/// * `stds` - Standard deviation for each component
186/// * `shape` - Shape of the output array
187///
188/// # Returns
189///
190/// An array of random values from the mixture distribution
191pub fn mixture_of_normals<T>(
192    weights: &[T],
193    means: &[T],
194    stds: &[T],
195    shape: &[usize],
196) -> Result<Array<T>>
197where
198    T: Float + NumCast + Clone + Debug + Display,
199{
200    let rng = get_global_random_state()?;
201    rng.mixture_of_normals(weights, means, stds, shape)
202}
203
204/// Generate Sobol sequence for quasi-Monte Carlo methods
205///
206/// # Arguments
207///
208/// * `dim` - Dimensionality of the sequence
209/// * `n` - Number of points to generate
210///
211/// # Returns
212///
213/// An array of shape [n, dim] with Sobol sequence points
214pub fn sobol_sequence<T>(dim: usize, n: usize) -> Result<Array<T>>
215where
216    T: Float + NumCast + Clone + Debug + Display,
217{
218    let rng = get_global_random_state()?;
219    rng.sobol_sequence(dim, n)
220}
221
222/// Generate Latin Hypercube samples
223///
224/// # Arguments
225///
226/// * `dim` - Number of dimensions
227/// * `n` - Number of samples
228///
229/// # Returns
230///
231/// An array of shape [n, dim] with Latin Hypercube samples
232pub fn latin_hypercube<T>(dim: usize, n: usize) -> Result<Array<T>>
233where
234    T: Float + NumCast + Clone + Debug + Display,
235{
236    let rng = get_global_random_state()?;
237    rng.latin_hypercube(dim, n)
238}
239
240/// Generate copula samples with a specified correlation structure
241///
242/// # Arguments
243///
244/// * `corr` - Correlation matrix
245/// * `n` - Number of samples
246/// * `copula_type` - Type of copula ("gaussian" or "t" supported)
247///
248/// # Returns
249///
250/// An array of shape [n, dim] with correlated uniform samples
251pub fn copula<T>(corr: &Array<T>, n: usize, copula_type: &str) -> Result<Array<T>>
252where
253    T: Float + NumCast + Clone + Debug + Display,
254{
255    let rng = get_global_random_state()?;
256    rng.copula(corr, n, copula_type)
257}
258
259/// Extend RandomState with enhanced distribution methods
260impl RandomState {
261    /// Generate random values from a truncated normal distribution
262    pub fn truncated_normal<T>(
263        &self,
264        mean: T,
265        std: T,
266        low: T,
267        high: T,
268        shape: &[usize],
269    ) -> Result<Array<T>>
270    where
271        T: Float + NumCast + Clone + Debug + Display,
272    {
273        if low >= high {
274            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
275                "Lower bound must be less than upper bound, got low={}, high={}",
276                low, high
277            )));
278        }
279
280        if std <= T::zero() {
281            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
282                "Standard deviation must be positive, got {}",
283                std
284            )));
285        }
286
287        let size: usize = shape.iter().product();
288        let mut vec = Vec::with_capacity(size);
289        let mut rng = self.get_rng()?;
290
291        // Convert to f64 for calculation
292        let mean_f64 = mean.to_f64().unwrap_or(0.0);
293        let std_f64 = std.to_f64().unwrap_or(1.0);
294        let low_f64 = low.to_f64().unwrap_or(f64::NEG_INFINITY);
295        let high_f64 = high.to_f64().unwrap_or(f64::INFINITY);
296
297        // Use rejection sampling for truncated normal (more robust than inverse CDF)
298        for _ in 0..size {
299            let mut sample;
300            let mut attempts = 0;
301            loop {
302                // Generate standard normal sample
303                let u1 = rng.random::<f64>();
304                let u2 = rng.random::<f64>();
305                let z = (-2.0 * u1.ln()).sqrt() * (2.0 * std::f64::consts::PI * u2).cos();
306
307                // Transform to desired distribution
308                sample = mean_f64 + std_f64 * z;
309
310                // Check if within bounds
311                if sample >= low_f64 && sample <= high_f64 {
312                    break;
313                }
314
315                attempts += 1;
316                // Prevent infinite loops for very narrow truncation
317                if attempts > 1000 {
318                    sample = (low_f64 + high_f64) / 2.0;
319                    break;
320                }
321            }
322
323            let val = <T as NumCast>::from(sample).unwrap_or_else(|| {
324                if sample <= low.to_f64().unwrap_or(f64::NEG_INFINITY) {
325                    low
326                } else {
327                    high
328                }
329            });
330
331            vec.push(val);
332        }
333
334        Ok(Array::from_vec(vec).reshape(shape))
335    }
336
337    /// Generate random values from a von Mises distribution
338    pub fn vonmises<T>(&self, mu: T, kappa: T, shape: &[usize]) -> Result<Array<T>>
339    where
340        T: Float + NumCast + Clone + Debug + Display,
341    {
342        if kappa < T::zero() {
343            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
344                "Concentration parameter must be non-negative, got {}",
345                kappa
346            )));
347        }
348
349        let size: usize = shape.iter().product();
350        let mut vec = Vec::with_capacity(size);
351        let mut rng = self.get_rng()?;
352
353        let kappa_f64 = kappa.to_f64().unwrap_or(0.0);
354        let mu_f64 = mu.to_f64().unwrap_or(0.0);
355
356        // Generate von Mises samples using the algorithm from Devroye (1986)
357        for _ in 0..size {
358            let sample = if kappa_f64 < 1e-6 {
359                // For very small kappa, use uniform distribution
360                rng.random::<f64>() * 2.0 * std::f64::consts::PI - std::f64::consts::PI
361            } else {
362                // Use Best-Fisher algorithm for von Mises sampling
363                let a = 1.0 + (1.0 + 4.0 * kappa_f64 * kappa_f64).sqrt();
364                let b = (a - (2.0 * a).sqrt()) / (2.0 * kappa_f64);
365                let r = (1.0 + b * b) / (2.0 * b);
366
367                let mut attempts = 0;
368                loop {
369                    attempts += 1;
370                    if attempts > 1000 {
371                        // Fallback to uniform if too many attempts
372                        break rng.random::<f64>() * 2.0 * std::f64::consts::PI
373                            - std::f64::consts::PI;
374                    }
375
376                    let u1 = rng.random::<f64>();
377                    let z = (1.0 - u1).cos();
378                    let f = (1.0 + r * z) / (r + z);
379                    let c = kappa_f64 * (r - f);
380
381                    let u2 = rng.random::<f64>();
382
383                    if c * (2.0 - c) - u2 > 0.0 {
384                        let u3 = rng.random::<f64>();
385                        let theta = if u3 - 0.5 > 0.0 { f.acos() } else { -f.acos() };
386                        break theta;
387                    }
388
389                    if (c / u2.max(1e-10)).ln() + 1.0 - c >= 0.0 {
390                        let u3 = rng.random::<f64>();
391                        let theta = if u3 - 0.5 > 0.0 { f.acos() } else { -f.acos() };
392                        break theta;
393                    }
394                }
395            };
396
397            let angle = mu_f64 + sample;
398            let normalized = ((angle + std::f64::consts::PI) % (2.0 * std::f64::consts::PI))
399                - std::f64::consts::PI;
400
401            vec.push(<T as NumCast>::from(normalized).unwrap_or(T::zero()));
402        }
403
404        Ok(Array::from_vec(vec).reshape(shape))
405    }
406
407    /// Generate random values from a non-central chi-square distribution
408    pub fn noncentral_chisquare<T>(&self, df: T, nonc: T, shape: &[usize]) -> Result<Array<T>>
409    where
410        T: Float + NumCast + Clone + Debug + Display,
411    {
412        if df <= T::zero() {
413            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
414                "Degrees of freedom must be positive, got {}",
415                df
416            )));
417        }
418
419        if nonc < T::zero() {
420            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
421                "Non-centrality parameter must be non-negative, got {}",
422                nonc
423            )));
424        }
425
426        let size: usize = shape.iter().product();
427        let mut vec = Vec::with_capacity(size);
428
429        let df_f64 = df.to_f64().unwrap_or(0.0);
430        let nonc_f64 = nonc.to_f64().unwrap_or(0.0);
431
432        // Algorithm: non-central chi-square can be generated as a Poisson mixture of central chi-squares
433        for _ in 0..size {
434            // Generate Poisson random variable with mean nonc/2
435            let pois: Array<u64> = self.poisson(nonc_f64 / 2.0, &[1])?;
436            let n = <usize as NumCast>::from(pois.to_vec()[0]).unwrap_or(0);
437
438            // Generate chi-square with df + 2*n degrees of freedom
439            let chi2 = self.chisquare(
440                <T as NumCast>::from(df_f64 + 2.0 * n as f64).unwrap_or(T::zero()),
441                &[1],
442            )?;
443
444            vec.push(chi2.to_vec()[0]);
445        }
446
447        Ok(Array::from_vec(vec).reshape(shape))
448    }
449
450    /// Generate random values from a non-central F distribution
451    pub fn noncentral_f<T>(&self, dfnum: T, dfden: T, nonc: T, shape: &[usize]) -> Result<Array<T>>
452    where
453        T: Float + NumCast + Clone + Debug + Display,
454    {
455        if dfnum <= T::zero() || dfden <= T::zero() {
456            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
457                "Degrees of freedom must be positive, got dfnum={}, dfden={}",
458                dfnum, dfden
459            )));
460        }
461
462        if nonc < T::zero() {
463            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
464                "Non-centrality parameter must be non-negative, got {}",
465                nonc
466            )));
467        }
468
469        let size: usize = shape.iter().product();
470        let mut vec = Vec::with_capacity(size);
471
472        let dfnum_f64 = dfnum.to_f64().unwrap_or(0.0);
473        let dfden_f64 = dfden.to_f64().unwrap_or(0.0);
474        let nonc_f64 = nonc.to_f64().unwrap_or(0.0);
475
476        // Non-central F is the ratio of a non-central chi-square and a central chi-square,
477        // each divided by their degrees of freedom
478        for _ in 0..size {
479            // Generate non-central chi-square with dfnum degrees of freedom
480            let nc_chi2 = self.noncentral_chisquare(
481                <T as NumCast>::from(dfnum_f64).unwrap_or(T::zero()),
482                <T as NumCast>::from(nonc_f64).unwrap_or(T::zero()),
483                &[1],
484            )?;
485
486            // Generate central chi-square with dfden degrees of freedom
487            let chi2 =
488                self.chisquare(<T as NumCast>::from(dfden_f64).unwrap_or(T::zero()), &[1])?;
489
490            // Compute the ratio (non-central F)
491            let f_val = (nc_chi2.to_vec()[0] / dfnum) / (chi2.to_vec()[0] / dfden);
492            vec.push(f_val);
493        }
494
495        Ok(Array::from_vec(vec).reshape(shape))
496    }
497
498    /// Generate random values from a Maxwell distribution
499    pub fn maxwell<T>(&self, scale: T, shape: &[usize]) -> Result<Array<T>>
500    where
501        T: Float + NumCast + Clone + Debug + Display,
502    {
503        if scale <= T::zero() {
504            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
505                "Scale parameter must be positive, got {}",
506                scale
507            )));
508        }
509
510        let size: usize = shape.iter().product();
511        let mut vec = Vec::with_capacity(size);
512
513        // Maxwell distribution is the distribution of the magnitude of a
514        // 3D vector with independent normal components
515        for _ in 0..size {
516            // Generate 3 independent normal random variables
517            let x = self.normal(T::zero(), scale, &[3])?;
518
519            // Compute the magnitude
520            let magnitude =
521                (x.to_vec()[0].powi(2) + x.to_vec()[1].powi(2) + x.to_vec()[2].powi(2)).sqrt();
522
523            vec.push(magnitude);
524        }
525
526        Ok(Array::from_vec(vec).reshape(shape))
527    }
528
529    /// Generate random values from a power distribution
530    pub fn power<T>(&self, a: T, shape: &[usize]) -> Result<Array<T>>
531    where
532        T: Float + NumCast + Clone + Debug + Display,
533    {
534        if a <= T::zero() {
535            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
536                "Power parameter must be positive, got {}",
537                a
538            )));
539        }
540
541        let size: usize = shape.iter().product();
542        let mut vec = Vec::with_capacity(size);
543        let mut rng = self.get_rng()?;
544
545        // Power distribution with parameter a has PDF: a*x^(a-1) on [0, 1]
546        // and can be generated by transforming uniform random variables
547        for _ in 0..size {
548            let u = rng.random::<f64>();
549            let val = u.powf(1.0 / a.to_f64().unwrap_or(1.0));
550            vec.push(<T as NumCast>::from(val).unwrap_or(T::zero()));
551        }
552
553        Ok(Array::from_vec(vec).reshape(shape))
554    }
555
556    /// Generate correlated random variables using the Cholesky decomposition
557    pub fn multivariate_normal_cholesky<T>(
558        &self,
559        means: &[T],
560        cov: &Array<T>,
561        size: usize,
562    ) -> Result<Array<T>>
563    where
564        T: Float + NumCast + Clone + Debug + Display,
565    {
566        let n = means.len();
567        let cov_shape = cov.shape();
568
569        // Validate inputs
570        if cov_shape.len() != 2 || cov_shape[0] != n || cov_shape[1] != n {
571            return Err(crate::error::NumRs2Error::InvalidOperation(
572                format!("Covariance matrix must be square with dimensions matching mean vector length ({}), got shape {:?}", n, cov_shape)
573            ));
574        }
575
576        // Perform Cholesky decomposition
577        let cov_data = cov.to_vec();
578        let mut chol = vec![T::zero(); n * n];
579
580        for i in 0..n {
581            for j in 0..=i {
582                let mut s = T::zero();
583                for k in 0..j {
584                    s = s + chol[i * n + k] * chol[j * n + k];
585                }
586
587                if i == j {
588                    let val = cov_data[i * n + i] - s;
589                    if val <= T::zero() {
590                        return Err(crate::error::NumRs2Error::InvalidOperation(
591                            "Covariance matrix is not positive definite".to_string(),
592                        ));
593                    }
594                    chol[i * n + j] = val.sqrt();
595                } else {
596                    chol[i * n + j] = (cov_data[i * n + j] - s) / chol[j * n + j];
597                }
598            }
599        }
600
601        // Generate standard normal samples
602        let std_normal = self.standard_normal::<T>(&[size, n])?;
603        let std_normal_data = std_normal.to_vec();
604
605        // Transform samples using Cholesky factor
606        let mut result = vec![T::zero(); size * n];
607
608        for i in 0..size {
609            for j in 0..n {
610                let mut sum = T::zero();
611                for k in 0..n {
612                    if j >= k {
613                        // Cholesky factor is lower triangular
614                        sum = sum + chol[j * n + k] * std_normal_data[i * n + k];
615                    }
616                }
617                result[i * n + j] = means[j] + sum;
618            }
619        }
620
621        Ok(Array::from_vec(result).reshape(&[size, n]))
622    }
623
624    /// Generate a random correlation matrix
625    pub fn random_correlation_matrix<T>(&self, n: usize) -> Result<Array<T>>
626    where
627        T: Float
628            + NumCast
629            + Clone
630            + Debug
631            + Display
632            + scirs2_core::ndarray::distributions::uniform::SampleUniform,
633    {
634        if n < 2 {
635            return Err(crate::error::NumRs2Error::InvalidOperation(
636                "Correlation matrix dimension must be at least 2".to_string(),
637            ));
638        }
639
640        // Generate random matrix with values from a uniform distribution
641        let uniform = self.uniform::<T>(
642            <T as NumCast>::from(-1.0).unwrap_or(T::zero()),
643            <T as NumCast>::from(1.0).unwrap_or(T::zero()),
644            &[n, n],
645        )?;
646
647        let uniform_data = uniform.to_vec();
648
649        // Make the matrix symmetric
650        let mut sym_matrix = vec![T::zero(); n * n];
651        for i in 0..n {
652            for j in 0..n {
653                match i.cmp(&j) {
654                    std::cmp::Ordering::Equal => {
655                        sym_matrix[i * n + j] = <T as NumCast>::from(1.0).unwrap_or(T::zero());
656                    }
657                    std::cmp::Ordering::Less => {
658                        sym_matrix[i * n + j] = uniform_data[i * n + j];
659                        sym_matrix[j * n + i] = uniform_data[i * n + j];
660                    }
661                    std::cmp::Ordering::Greater => {}
662                }
663            }
664        }
665
666        // Convert to nearest correlation matrix using projection
667        // This is a simplified approximation - in practice, more sophisticated
668        // algorithms like the alternating projections method would be used.
669
670        // 1. Set all diagonal elements to 1
671        for i in 0..n {
672            sym_matrix[i * n + i] = <T as NumCast>::from(1.0).unwrap_or(T::zero());
673        }
674
675        // 2. Iteratively adjust non-diagonal elements to ensure positive-definiteness
676        let mut corr_matrix = sym_matrix.clone();
677        let max_iter = 10;
678
679        for _ in 0..max_iter {
680            // Check if matrix is positive definite via Cholesky decomposition
681            let mut is_pd = true;
682            let mut chol = vec![T::zero(); n * n];
683
684            'decomp: for i in 0..n {
685                for j in 0..=i {
686                    let mut s = T::zero();
687                    for k in 0..j {
688                        s = s + chol[i * n + k] * chol[j * n + k];
689                    }
690
691                    if i == j {
692                        let val = corr_matrix[i * n + i] - s;
693                        if val <= T::zero() {
694                            is_pd = false;
695                            break 'decomp;
696                        }
697                        chol[i * n + j] = val.sqrt();
698                    } else {
699                        chol[i * n + j] = (corr_matrix[i * n + j] - s) / chol[j * n + j];
700                    }
701                }
702            }
703
704            if is_pd {
705                break;
706            }
707
708            // If not positive definite, adjust non-diagonal elements
709            let factor = <T as NumCast>::from(0.9).unwrap_or(T::zero());
710            for i in 0..n {
711                for j in 0..n {
712                    if i != j {
713                        corr_matrix[i * n + j] = corr_matrix[i * n + j] * factor;
714                    }
715                }
716            }
717        }
718
719        // Normalize to ensure diagonal is exactly 1.0
720        for i in 0..n {
721            let diag_val = corr_matrix[i * n + i].sqrt();
722            for j in 0..n {
723                corr_matrix[i * n + j] = corr_matrix[i * n + j] / diag_val;
724                corr_matrix[j * n + i] = corr_matrix[j * n + i] / diag_val;
725            }
726        }
727
728        // Final normalization pass
729        for i in 0..n {
730            for j in 0..n {
731                corr_matrix[i * n + j] = corr_matrix[i * n + j]
732                    / (corr_matrix[i * n + i].sqrt() * corr_matrix[j * n + j].sqrt());
733            }
734        }
735
736        // Set diagonal to exactly 1.0
737        for i in 0..n {
738            corr_matrix[i * n + i] = <T as NumCast>::from(1.0).unwrap_or(T::zero());
739        }
740
741        Ok(Array::from_vec(corr_matrix).reshape(&[n, n]))
742    }
743
744    /// Generate random samples from a mixture of distributions
745    pub fn mixture_of_normals<T>(
746        &self,
747        weights: &[T],
748        means: &[T],
749        stds: &[T],
750        shape: &[usize],
751    ) -> Result<Array<T>>
752    where
753        T: Float + NumCast + Clone + Debug + Display,
754    {
755        let n_components = weights.len();
756
757        // Validate inputs
758        if n_components < 1 {
759            return Err(crate::error::NumRs2Error::InvalidOperation(
760                "Mixture must have at least one component".to_string(),
761            ));
762        }
763
764        if means.len() != n_components || stds.len() != n_components {
765            return Err(crate::error::NumRs2Error::InvalidOperation(
766                "Weights, means, and stds must have the same length".to_string(),
767            ));
768        }
769
770        // Check if weights sum to 1
771        let sum_weights: T = weights.iter().fold(T::zero(), |acc, w| acc + *w);
772        if (sum_weights - <T as NumCast>::from(1.0).unwrap_or(T::zero())).abs()
773            > <T as NumCast>::from(1e-6).unwrap_or(T::zero())
774        {
775            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
776                "Weights must sum to 1, got sum={}",
777                sum_weights
778            )));
779        }
780
781        // Check if stds are positive
782        for &std in stds {
783            if std <= T::zero() {
784                return Err(crate::error::NumRs2Error::InvalidOperation(
785                    "Standard deviations must be positive".to_string(),
786                ));
787            }
788        }
789
790        let size: usize = shape.iter().product();
791        let mut vec = Vec::with_capacity(size);
792        let mut rng = self.get_rng()?;
793
794        // Normalize weights to ensure they sum to 1.0
795        let mut norm_weights = Vec::with_capacity(n_components);
796        for &w in weights {
797            norm_weights.push(w / sum_weights);
798        }
799
800        // Compute cumulative weights for component selection
801        let mut cumulative_weights = Vec::with_capacity(n_components);
802        let mut sum = T::zero();
803        for &w in &norm_weights {
804            sum = sum + w;
805            cumulative_weights.push(sum);
806        }
807
808        // Optimized approach: generate component selections and normal samples separately
809        let mut component_selections = Vec::with_capacity(size);
810
811        // Generate all component selections at once
812        for _ in 0..size {
813            let u = <T as NumCast>::from(rng.random::<f64>()).unwrap_or(T::zero());
814            let mut selected_component = 0;
815
816            for (i, &cw) in cumulative_weights.iter().enumerate() {
817                if u <= cw {
818                    selected_component = i;
819                    break;
820                }
821            }
822            component_selections.push(selected_component);
823        }
824
825        // Count how many samples we need from each component
826        let mut component_counts = vec![0usize; n_components];
827        for &comp in &component_selections {
828            component_counts[comp] += 1;
829        }
830
831        // Generate all normal samples for each component in batches
832        let mut component_samples = Vec::with_capacity(n_components);
833        for i in 0..n_components {
834            if component_counts[i] > 0 {
835                let samples = self.normal(means[i], stds[i], &[component_counts[i]])?;
836                component_samples.push(samples.to_vec());
837            } else {
838                component_samples.push(Vec::new());
839            }
840        }
841
842        // Assign samples in the original order
843        let mut component_indices = vec![0usize; n_components];
844        for &selected_component in &component_selections {
845            vec.push(component_samples[selected_component][component_indices[selected_component]]);
846            component_indices[selected_component] += 1;
847        }
848
849        Ok(Array::from_vec(vec).reshape(shape))
850    }
851
852    /// Generate Sobol sequence for quasi-Monte Carlo methods
853    pub fn sobol_sequence<T>(&self, dim: usize, n: usize) -> Result<Array<T>>
854    where
855        T: Float + NumCast + Clone + Debug + Display,
856    {
857        if !(1..=40).contains(&dim) {
858            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
859                "Dimension must be between 1 and 40, got {}",
860                dim
861            )));
862        }
863
864        if n < 1 {
865            return Err(crate::error::NumRs2Error::InvalidOperation(
866                "Number of points must be at least 1".to_string(),
867            ));
868        }
869
870        // Direction numbers (first few values for each dimension)
871        // These are based on Joe & Kuo's direction numbers
872        let direction_numbers: Vec<Vec<u32>> = vec![
873            vec![1],                       // First dimension trivial sequence
874            vec![1, 3],                    // Second dimension
875            vec![1, 3, 5],                 // Third dimension
876            vec![1, 3, 5, 15],             // Fourth dimension
877            vec![1, 3, 5, 15, 17],         // Fifth dimension
878            vec![1, 3, 5, 15, 17, 51],     // Sixth dimension
879            vec![1, 3, 5, 15, 17, 51, 85], // Seventh dimension
880            vec![1, 3, 5, 15, 17, 51, 85, 255], // Eighth dimension
881                                           // Additional dimensions would be added here in a complete implementation
882        ];
883
884        let mut result = vec![T::zero(); n * dim];
885
886        // Generate Sobol sequence
887        for i in 0..n {
888            // Convert i to gray code
889            let g = i ^ (i >> 1);
890
891            for d in 0..std::cmp::min(dim, direction_numbers.len()) {
892                let mut x = 0u32;
893                let mut mask = 1u32;
894
895                for j in 0..32 {
896                    if j < direction_numbers[d].len() && (g & (mask as usize)) != 0 {
897                        x ^= direction_numbers[d][j];
898                    }
899                    mask <<= 1;
900                }
901
902                // Convert to [0,1) range
903                let val = <T as NumCast>::from(x as f64 / 2f64.powi(32)).unwrap_or(T::zero());
904                result[i * dim + d] = val;
905            }
906
907            // For dimensions beyond the provided direction numbers, use random values
908            // This is a fallback - a proper implementation would have all direction numbers
909            let mut rng = self.get_rng()?;
910            for d in direction_numbers.len()..dim {
911                result[i * dim + d] =
912                    <T as NumCast>::from(rng.random::<f64>()).unwrap_or(T::zero());
913            }
914        }
915
916        Ok(Array::from_vec(result).reshape(&[n, dim]))
917    }
918
919    /// Generate Latin Hypercube samples
920    pub fn latin_hypercube<T>(&self, dim: usize, n: usize) -> Result<Array<T>>
921    where
922        T: Float + NumCast + Clone + Debug + Display,
923    {
924        if dim < 1 {
925            return Err(crate::error::NumRs2Error::InvalidOperation(
926                "Dimension must be at least 1".to_string(),
927            ));
928        }
929
930        if n < 2 {
931            return Err(crate::error::NumRs2Error::InvalidOperation(
932                "Number of samples must be at least 2".to_string(),
933            ));
934        }
935
936        let mut result = vec![T::zero(); n * dim];
937
938        // For each dimension, create a permutation of the integers 0 to n-1
939        for d in 0..dim {
940            // Create vector of integers 0 to n-1
941            let mut perm: Vec<usize> = (0..n).collect();
942
943            // Shuffle the vector
944            let mut rng = self.get_rng()?;
945            for i in (1..n).rev() {
946                let j = (rng.random::<f64>() * (i + 1) as f64) as usize;
947                perm.swap(i, j);
948            }
949
950            // Generate uniform random values within each stratum
951            for i in 0..n {
952                let u = rng.random::<f64>();
953                let val =
954                    <T as NumCast>::from((perm[i] as f64 + u) / n as f64).unwrap_or(T::zero());
955                result[i * dim + d] = val;
956            }
957        }
958
959        Ok(Array::from_vec(result).reshape(&[n, dim]))
960    }
961
962    /// Generate copula samples with a specified correlation structure
963    pub fn copula<T>(&self, corr: &Array<T>, n: usize, copula_type: &str) -> Result<Array<T>>
964    where
965        T: Float + NumCast + Clone + Debug + Display,
966    {
967        let corr_shape = corr.shape();
968
969        // Validate correlation matrix
970        if corr_shape.len() != 2 || corr_shape[0] != corr_shape[1] {
971            return Err(crate::error::NumRs2Error::InvalidOperation(
972                "Correlation matrix must be square".to_string(),
973            ));
974        }
975
976        let dim = corr_shape[0];
977
978        // Check if copula_type is supported
979        let valid_types = ["gaussian", "t"];
980        if !valid_types.contains(&copula_type) {
981            return Err(crate::error::NumRs2Error::InvalidOperation(format!(
982                "Unsupported copula type: {}. Supported types: {:?}",
983                copula_type, valid_types
984            )));
985        }
986
987        // Generate correlated normal samples using Cholesky decomposition
988        let means = vec![T::zero(); dim];
989        let mvn_samples = self.multivariate_normal_cholesky(&means, corr, n)?;
990        let mvn_data = mvn_samples.to_vec();
991
992        // Transform to uniform using the CDF
993        let mut result = vec![T::zero(); n * dim];
994
995        for i in 0..n {
996            for d in 0..dim {
997                let z = mvn_data[i * dim + d].to_f64().unwrap_or(0.0);
998
999                // Apply appropriate CDF based on copula type
1000                let u = match copula_type {
1001                    "gaussian" => normal_cdf(z),
1002                    "t" => {
1003                        // T-copula with 4 degrees of freedom
1004                        student_t_cdf(z, 4)
1005                    }
1006                    _ => normal_cdf(z), // Default to Gaussian
1007                };
1008
1009                result[i * dim + d] = <T as NumCast>::from(u).unwrap_or(T::zero());
1010            }
1011        }
1012
1013        Ok(Array::from_vec(result).reshape(&[n, dim]))
1014    }
1015}
1016
1017// Helper functions for probability distributions
1018
1019/// Normal cumulative distribution function
1020fn normal_cdf(x: f64) -> f64 {
1021    0.5 * (1.0 + erf(x / std::f64::consts::SQRT_2))
1022}
1023
1024/// Inverse normal cumulative distribution function
1025#[allow(dead_code)]
1026fn normal_inv_cdf(p: f64) -> f64 {
1027    if p <= 0.0 {
1028        return f64::NEG_INFINITY;
1029    }
1030    if p >= 1.0 {
1031        return f64::INFINITY;
1032    }
1033
1034    // Approximate the inverse CDF using the Beasley-Springer-Moro algorithm
1035    let q = p - 0.5;
1036
1037    if q.abs() <= 0.425 {
1038        // Central region
1039        let r = 0.180625 - q * q;
1040        return q
1041            * (((((((2.509_080_928_730_122_6 * r + 3.343_057_558_358_813e1) * r
1042                + 6.726_577_092_700_87e1)
1043                * r
1044                + 4.592_195_393_154_987e1)
1045                * r
1046                + 1.373_169_376_550_946_2e1)
1047                * r
1048                + 1.421_413_764_013_155_7)
1049                * r
1050                + 2.298_979_990_914_786_5e-1)
1051                / (((((((4.374_317_029_667_823e-2 * r + 3.739_716_869_366_193_3) * r
1052                    + 4.692_163_145_304_143_5e1)
1053                    * r
1054                    + 2.266_863_181_546_454_5e2)
1055                    * r
1056                    + 5.396_173_702_892_064e2)
1057                    * r
1058                    + 6.573_191_171_972_302e2)
1059                    * r
1060                    + 3.734_237_715_407_137e2)
1061                    * r
1062                    + 1.0));
1063    }
1064
1065    // Tail regions
1066    let r = if q > 0.0 { 1.0 - p } else { p };
1067
1068    if r <= 0.0 {
1069        return if q > 0.0 {
1070            f64::INFINITY
1071        } else {
1072            f64::NEG_INFINITY
1073        };
1074    }
1075
1076    let r = (-r.ln()).sqrt();
1077
1078    let mut ret = ((((((1.811_625_079_763_736_7e1 * r + 7.928_172_819_374_223e1) * r
1079        + 1.373_169_376_550_946_2e2)
1080        * r
1081        + 1.193_147_912_264_617e2)
1082        * r
1083        + 4.926_845_824_098_105e1)
1084        * r
1085        + 8.400_547_514_910_246)
1086        * r
1087        + 3.050_789_888_729_818e-1)
1088        / ((((((3.020_637_025_121_939_4e-1 * r + 5.595_303_579_120_197) * r
1089            + 3.042_975_173_014_595e1)
1090            * r
1091            + 6.493_763_419_991_991e1)
1092            * r
1093            + 5.786_293_056_261_984e1)
1094            * r
1095            + 2.121_379_430_158_66e1)
1096            * r
1097            + 2.659_135_201_941_675)
1098        * r
1099        + 1.0;
1100
1101    ret = if q < 0.0 { -ret } else { ret };
1102    ret
1103}
1104
1105/// Error function approximation
1106fn erf(x: f64) -> f64 {
1107    // Constants for Abramowitz and Stegun approximation 7.1.26
1108    let a1 = 0.254829592;
1109    let a2 = -0.284496736;
1110    let a3 = 1.421413741;
1111    let a4 = -1.453152027;
1112    let a5 = 1.061405429;
1113    let p = 0.3275911;
1114
1115    let sign = if x < 0.0 { -1.0 } else { 1.0 };
1116    let x = x.abs();
1117
1118    let t = 1.0 / (1.0 + p * x);
1119    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
1120
1121    sign * y
1122}
1123
1124/// Student's t cumulative distribution function
1125fn student_t_cdf(x: f64, df: usize) -> f64 {
1126    // Simplified approximation
1127    if x == 0.0 {
1128        return 0.5;
1129    }
1130
1131    // For large df, t-distribution approaches normal distribution
1132    if df > 100 {
1133        return normal_cdf(x);
1134    }
1135
1136    // Simple approximation using the regularized incomplete beta function
1137    let df_f64 = df as f64;
1138    let t = x / (df_f64 + x * x).sqrt();
1139    let u = 0.5 + 0.5 * t * (1.0 - t * t / (df_f64 + 2.0)).sqrt();
1140
1141    // Clamp to valid probability range
1142    u.clamp(0.0, 1.0)
1143}
1144
1145// Enhanced distributions directly exported by the parent module, no need to re-export here
1146
1147// Unit tests for enhanced distributions
1148#[cfg(test)]
1149mod tests {
1150    use super::*;
1151
1152    #[test]
1153    fn test_truncated_normal() {
1154        let mean = 0.0;
1155        let std = 1.0;
1156        let low = -2.0;
1157        let high = 2.0;
1158
1159        // Generate truncated normal samples (reduced size for performance)
1160        let samples = truncated_normal(mean, std, low, high, &[100])
1161            .expect("test: truncated_normal should succeed");
1162        let data = samples.to_vec();
1163
1164        // Check bounds
1165        for &val in &data {
1166            assert!(val >= low && val <= high, "Value outside bounds: {}", val);
1167        }
1168
1169        // Check statistics (should be approximately normal within bounds)
1170        let mean_val: f64 = data.iter().sum::<f64>() / data.len() as f64;
1171        assert!(
1172            (mean_val - mean).abs() < 0.5,
1173            "Mean too far from expected: {}",
1174            mean_val
1175        );
1176    }
1177
1178    #[test]
1179    fn test_vonmises() {
1180        let mu = 0.0;
1181        let kappa = 2.0;
1182
1183        // Generate von Mises samples
1184        let samples = vonmises(mu, kappa, &[1000]).expect("test: vonmises should succeed");
1185        let data = samples.to_vec();
1186
1187        // Check bounds (should be in [-π, π))
1188        for &val in &data {
1189            assert!(
1190                (-std::f64::consts::PI..std::f64::consts::PI).contains(&val),
1191                "Value outside bounds: {}",
1192                val
1193            );
1194        }
1195
1196        // Calculate circular mean
1197        let sum_sin: f64 = data.iter().map(|&x| x.sin()).sum();
1198        let sum_cos: f64 = data.iter().map(|&x| x.cos()).sum();
1199        let mean_angle = sum_sin.atan2(sum_cos);
1200
1201        // For von Mises, mean direction should be close to mu
1202        let angle_diff = (mean_angle - mu + std::f64::consts::PI) % (2.0 * std::f64::consts::PI)
1203            - std::f64::consts::PI;
1204        assert!(
1205            angle_diff.abs() < 0.5,
1206            "Mean direction too far from expected: {}",
1207            mean_angle
1208        );
1209    }
1210
1211    #[test]
1212    fn test_latin_hypercube() {
1213        let dim = 2;
1214        let n = 10;
1215
1216        // Generate Latin Hypercube samples
1217        let samples = latin_hypercube::<f64>(dim, n).expect("test: latin_hypercube should succeed");
1218        let data = samples.to_vec();
1219
1220        // Check that each dimension has one sample in each stratum
1221        for d in 0..dim {
1222            let mut counts = vec![0; n];
1223
1224            for i in 0..n {
1225                let val = data[i * dim + d];
1226                let stratum = (val * n as f64).floor() as usize;
1227                let stratum = std::cmp::min(stratum, n - 1); // Handle edge case for val=1.0
1228                counts[stratum] += 1;
1229            }
1230
1231            // Each stratum should have exactly one sample
1232            for (s, &count) in counts.iter().enumerate() {
1233                assert_eq!(count, 1, "Stratum {} has {} samples, expected 1", s, count);
1234            }
1235        }
1236    }
1237
1238    #[test]
1239    #[ignore = "Performance optimization needed - function works but is too slow for CI"]
1240    fn test_mixture_of_normals() {
1241        let weights = vec![0.3, 0.7];
1242        let means = vec![-3.0, 3.0];
1243        let stds = vec![1.0, 1.0];
1244
1245        // Generate mixture samples (reduced size for performance)
1246        let samples = mixture_of_normals(&weights, &means, &stds, &[100])
1247            .expect("test: mixture_of_normals should succeed");
1248
1249        let data = samples.to_vec();
1250
1251        // Check that distribution is bimodal
1252        // Sort the data for percentile calculation
1253        let mut sorted_data = data.clone();
1254        sorted_data.sort_by(|a, b| {
1255            a.partial_cmp(b)
1256                .expect("test: f64 comparison should succeed for non-NaN values")
1257        });
1258
1259        // Calculate 25th and 75th percentiles
1260        let p25 = sorted_data[(0.25 * sorted_data.len() as f64) as usize];
1261        let p75 = sorted_data[(0.75 * sorted_data.len() as f64) as usize];
1262
1263        // For a bimodal distribution with these parameters,
1264        // we expect the 25th percentile to be negative and
1265        // the 75th percentile to be positive
1266        assert!(p25 < 0.0, "25th percentile should be negative, got {}", p25);
1267        assert!(p75 > 0.0, "75th percentile should be positive, got {}", p75);
1268    }
1269}
numrs2/random/distributions_enhanced.rs

numrs2/random/
distributions_enhanced.rs