Skip to main content

scirs2_core/random/
scientific.rs

1//! Scientific computing workflows for reproducible research
2//!
3//! This module provides high-level interfaces specifically designed for scientific computing
4//! and research applications where reproducibility, statistical validity, and advanced
5//! sampling methods are critical.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use scirs2_core::random::scientific::*;
11//!
12//! // Reproducible experiments
13//! let mut experiment = ReproducibleExperiment::new(42);
14//! let sample1 = experiment.next_sample(1000, StandardNormal);
15//! let sample2 = experiment.next_sample(1000, StandardNormal);
16//!
17//! // Statistical sampling
18//! let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
19//! let bootstrap = bootstrap_sample(&data, 1000, 500);
20//! let jackknife = jackknife_samples(&data);
21//!
22//! // Monte Carlo with variance reduction
23//! let mut mc = MonteCarloSampler::with_antithetic_variates(12345);
24//! let samples = mc.generate_correlated_pairs(1000);
25//! ```
26
27use crate::random::{
28    arrays::{random_normal_array, random_uniform_array, OptimizedArrayRandom},
29    core::{
30        scientific::{DeterministicState, ReproducibleSequence},
31        seeded_rng, Random,
32    },
33    qmc::{HaltonGenerator, LatinHypercubeSampler, LowDiscrepancySequence, SobolGenerator},
34    variance_reduction::{AntitheticSampling, ControlVariate},
35    ParallelRng, ThreadLocalRngPool,
36};
37use ::ndarray::{Array, Array1, Array2, Ix2};
38use rand::{Rng, RngExt};
39use rand_distr::{Distribution, Normal, Uniform};
40use std::collections::HashMap;
41
42/// Standard normal distribution (mean=0, std=1) for convenience
43#[derive(Debug, Clone, Copy)]
44pub struct StandardNormal;
45
46impl Distribution<f64> for StandardNormal {
47    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
48        Normal::new(0.0, 1.0).expect("Operation failed").sample(rng)
49    }
50}
51
52/// Standard uniform distribution [0, 1) for convenience
53#[derive(Debug, Clone, Copy)]
54pub struct StandardUniform;
55
56impl Distribution<f64> for StandardUniform {
57    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
58        Uniform::new(0.0, 1.0)
59            .expect("Operation failed")
60            .sample(rng)
61    }
62}
63
64/// Reproducible experiment manager for scientific research
65pub struct ReproducibleExperiment {
66    sequence: ReproducibleSequence,
67    state: DeterministicState,
68}
69
70impl ReproducibleExperiment {
71    /// Create a new reproducible experiment with a seed
72    pub fn new(seed: u64) -> Self {
73        Self {
74            sequence: ReproducibleSequence::new(seed),
75            state: DeterministicState::new(seed),
76        }
77    }
78
79    /// Generate the next sample in the reproducible sequence
80    pub fn next_sample<D: Distribution<f64> + Copy>(
81        &mut self,
82        size: usize,
83        distribution: D,
84    ) -> Vec<f64> {
85        let mut rng = self.state.next_rng();
86        rng.sample_vec(distribution, size)
87    }
88
89    /// Generate the next array sample
90    pub fn next_array_sample<D: Distribution<f64> + Copy>(
91        &mut self,
92        shape: [usize; 2],
93        distribution: D,
94    ) -> Array2<f64> {
95        let mut rng = self.state.next_rng();
96        Array::random_bulk(Ix2(shape[0], shape[1]), distribution, &mut rng)
97    }
98
99    /// Reset the experiment to its initial state
100    pub fn reset(&mut self) {
101        self.sequence.reset();
102        self.state = DeterministicState::new(self.state.current_state().0);
103    }
104
105    /// Get current experiment state for logging/checkpointing
106    pub fn current_state(&self) -> (u64, u64) {
107        self.state.current_state()
108    }
109}
110
111/// Bootstrap sampling for statistical inference
112pub fn bootstrap_sample<T: Clone + Send + Sync>(
113    data: &[T],
114    n_bootstrap: usize,
115    sample_size: usize,
116) -> Vec<Vec<T>> {
117    let pool = ThreadLocalRngPool::new(42);
118    ParallelRng::parallel_bootstrap(data, n_bootstrap, &pool)
119        .into_iter()
120        .map(|sample| sample.into_iter().take(sample_size).collect())
121        .collect()
122}
123
124/// Jackknife resampling (leave-one-out)
125pub fn jackknife_samples<T: Clone>(data: &[T]) -> Vec<Vec<T>> {
126    (0..data.len())
127        .map(|i| {
128            data.iter()
129                .enumerate()
130                .filter(|(idx, _)| *idx != i)
131                .map(|(_, item)| item.clone())
132                .collect()
133        })
134        .collect()
135}
136
137/// Cross-validation splits
138pub fn cross_validation_splits<T: Clone>(
139    data: &[T],
140    k_folds: usize,
141    seed: u64,
142) -> Vec<(Vec<T>, Vec<T>)> {
143    let mut rng = seeded_rng(seed);
144    let mut indices: Vec<usize> = (0..data.len()).collect();
145
146    // Shuffle indices for random splits
147    use rand::seq::SliceRandom;
148    indices.shuffle(&mut rng.rng);
149
150    let fold_size = data.len() / k_folds;
151
152    (0..k_folds)
153        .map(|fold| {
154            let test_start = fold * fold_size;
155            let test_end = if fold == k_folds - 1 {
156                data.len()
157            } else {
158                test_start + fold_size
159            };
160
161            let test_indices = &indices[test_start..test_end];
162            let train_indices: Vec<usize> = indices
163                .iter()
164                .filter(|&&idx| !test_indices.contains(&idx))
165                .copied()
166                .collect();
167
168            let train_data = train_indices.iter().map(|&i| data[i].clone()).collect();
169            let test_data = test_indices.iter().map(|&i| data[i].clone()).collect();
170
171            (train_data, test_data)
172        })
173        .collect()
174}
175
176/// Monte Carlo sampler with variance reduction techniques
177pub struct MonteCarloSampler {
178    antithetic: Option<AntitheticSampling<rand::rngs::StdRng>>,
179    control_variate: Option<ControlVariate>,
180    base_seed: u64,
181}
182
183impl MonteCarloSampler {
184    /// Create a basic Monte Carlo sampler
185    pub fn new(seed: u64) -> Self {
186        Self {
187            antithetic: None,
188            control_variate: None,
189            base_seed: seed,
190        }
191    }
192
193    /// Create a sampler with antithetic variates for variance reduction
194    pub fn with_antithetic_variates(seed: u64) -> Self {
195        Self {
196            antithetic: Some(AntitheticSampling::new(seeded_rng(seed))),
197            control_variate: None,
198            base_seed: seed,
199        }
200    }
201
202    /// Create a sampler with control variates
203    pub fn with_control_variate(seed: u64, control_mean: f64) -> Self {
204        Self {
205            antithetic: None,
206            control_variate: Some(ControlVariate::new(control_mean)),
207            base_seed: seed,
208        }
209    }
210
211    /// Generate correlated sample pairs for variance reduction
212    pub fn generate_correlated_pairs(&mut self, count: usize) -> (Vec<f64>, Vec<f64>) {
213        if let Some(ref mut antithetic) = self.antithetic {
214            antithetic.generate_antithetic_pairs(count)
215        } else {
216            let mut rng = seeded_rng(self.base_seed);
217            let samples1: Vec<f64> = (0..count)
218                .map(|_| rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")))
219                .collect();
220            let samples2: Vec<f64> = (0..count)
221                .map(|_| rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")))
222                .collect();
223            (samples1, samples2)
224        }
225    }
226
227    /// Generate stratified samples for more uniform coverage
228    pub fn stratified_samples(&mut self, strata: usize, samples_per_stratum: usize) -> Vec<f64> {
229        if let Some(ref mut antithetic) = self.antithetic {
230            antithetic.stratified_samples(strata, samples_per_stratum)
231        } else {
232            let mut rng = seeded_rng(self.base_seed);
233            let mut all_samples = Vec::new();
234
235            for i in 0..strata {
236                let stratum_start = i as f64 / strata as f64;
237                let stratum_end = (i + 1) as f64 / strata as f64;
238
239                for _ in 0..samples_per_stratum {
240                    let u = rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed"));
241                    let sample = stratum_start + u * (stratum_end - stratum_start);
242                    all_samples.push(sample);
243                }
244            }
245
246            all_samples
247        }
248    }
249}
250
251/// Quasi-Monte Carlo sampling for better coverage
252pub struct QuasiMonteCarloSampler {
253    sobol: Option<SobolGenerator>,
254    halton: Option<HaltonGenerator>,
255    lhs: Option<LatinHypercubeSampler<rand::rngs::ThreadRng>>,
256    dimensions: usize,
257}
258
259impl QuasiMonteCarloSampler {
260    /// Create a Sobol sequence sampler
261    pub fn sobol(dimensions: usize) -> Result<Self, String> {
262        Ok(Self {
263            sobol: Some(SobolGenerator::dimension(dimensions).map_err(|e| e.to_string())?),
264            halton: None,
265            lhs: None,
266            dimensions,
267        })
268    }
269
270    /// Create a Halton sequence sampler
271    pub fn halton(dimensions: usize) -> Self {
272        Self {
273            sobol: None,
274            halton: Some(HaltonGenerator::new(
275                &(2u32..2u32 + dimensions as u32).collect::<Vec<_>>(),
276            )),
277            lhs: None,
278            dimensions,
279        }
280    }
281
282    /// Create a Latin Hypercube sampler
283    pub fn latin_hypercube(dimensions: usize) -> Self {
284        Self {
285            sobol: None,
286            halton: None,
287            lhs: Some(LatinHypercubeSampler::<rand::rngs::ThreadRng>::new(
288                dimensions,
289            )),
290            dimensions,
291        }
292    }
293
294    /// Generate quasi-random points
295    pub fn generate_points(&mut self, count: usize) -> Result<Vec<Vec<f64>>, String> {
296        if let Some(ref mut sobol) = self.sobol {
297            let array = sobol.generate(count);
298            let result: Vec<Vec<f64>> = array.rows().into_iter().map(|row| row.to_vec()).collect();
299            Ok(result)
300        } else if let Some(ref mut halton) = self.halton {
301            let array = halton.generate(count);
302            let result: Vec<Vec<f64>> = array.rows().into_iter().map(|row| row.to_vec()).collect();
303            Ok(result)
304        } else if let Some(ref mut lhs) = self.lhs {
305            let array = lhs.sample(count).map_err(|e| e.to_string())?;
306            Ok((0..count)
307                .map(|i| (0..self.dimensions).map(|j| array[[i, j]]).collect())
308                .collect())
309        } else {
310            Err("No QMC generator configured".to_string())
311        }
312    }
313}
314
315/// Design of experiments helper
316pub struct ExperimentalDesign;
317
318impl ExperimentalDesign {
319    /// Generate a factorial design
320    pub fn factorial_design(factors: &[Vec<f64>]) -> Vec<Vec<f64>> {
321        if factors.is_empty() {
322            return vec![vec![]];
323        }
324
325        let mut designs = vec![vec![]];
326
327        for factor_levels in factors {
328            let mut new_designs = Vec::new();
329            for design in &designs {
330                for &level in factor_levels {
331                    let mut new_design = design.clone();
332                    new_design.push(level);
333                    new_designs.push(new_design);
334                }
335            }
336            designs = new_designs;
337        }
338
339        designs
340    }
341
342    /// Generate a random fractional factorial design
343    pub fn fractional_factorial_design(
344        factors: &[Vec<f64>],
345        fraction: f64,
346        seed: u64,
347    ) -> Vec<Vec<f64>> {
348        let full_design = Self::factorial_design(factors);
349        let sample_size = (full_design.len() as f64 * fraction).ceil() as usize;
350
351        let mut rng = seeded_rng(seed);
352        use rand::seq::SliceRandom;
353        let mut sampled_design = full_design;
354        sampled_design.shuffle(&mut rng.rng);
355        sampled_design.truncate(sample_size);
356
357        sampled_design
358    }
359
360    /// Generate a central composite design
361    pub fn central_composite_design(dimensions: usize, alpha: f64) -> Vec<Vec<f64>> {
362        let mut design = Vec::new();
363
364        // Factorial points (corners of hypercube)
365        let factorial_factors: Vec<Vec<f64>> = (0..dimensions).map(|_| vec![-1.0, 1.0]).collect();
366        design.extend(Self::factorial_design(&factorial_factors));
367
368        // Axial points
369        for dim in 0..dimensions {
370            let mut point_pos = vec![0.0; dimensions];
371            let mut point_neg = vec![0.0; dimensions];
372            point_pos[dim] = alpha;
373            point_neg[dim] = -alpha;
374            design.push(point_pos);
375            design.push(point_neg);
376        }
377
378        // Center point
379        design.push(vec![0.0; dimensions]);
380
381        design
382    }
383
384    /// Generate a Latin Hypercube Sampling (LHS) design.
385    ///
386    /// LHS is a stratified sampling method that produces samples that are
387    /// well-distributed across each dimension's range. For each of the `D`
388    /// dimensions, the unit interval `[0, 1)` is split into `N` equal strata
389    /// and exactly one sample is drawn from each stratum; the per-dimension
390    /// samples are then independently shuffled to randomize cross-dimensional
391    /// pairings. Each `[0, 1)` sample is finally mapped to a discrete factor
392    /// level by index `(u * K) as usize` clamped to `K - 1`, where `K` is the
393    /// number of levels of that factor.
394    ///
395    /// # Arguments
396    ///
397    /// * `factors` - Per-factor level vectors (each `factors[d]` is the set of
398    ///   discrete levels for dimension `d`).
399    /// * `n_points` - Number of LHS points. When `None`, defaults to the
400    ///   maximum number of levels across all factors so that every level of
401    ///   every factor is visited at least once.
402    /// * `seed` - Deterministic RNG seed.
403    ///
404    /// # Returns
405    ///
406    /// A vector of `n_points` design points; each point is a `Vec<f64>` of
407    /// length `factors.len()` whose entries are drawn from the respective
408    /// factor's level set. Returns a single empty point when `factors` is
409    /// empty (matching the behavior of `factorial_design`), and an empty
410    /// vector when the resolved `n` is zero.
411    pub fn latin_hypercube_design(
412        factors: &[Vec<f64>],
413        n_points: Option<usize>,
414        seed: u64,
415    ) -> Vec<Vec<f64>> {
416        // Match factorial_design's edge case for empty factors.
417        if factors.is_empty() {
418            return vec![vec![]];
419        }
420
421        // Refuse zero-level factors — there is nothing meaningful to sample
422        // along that dimension. Returning an empty design preserves the
423        // function's `Vec<Vec<f64>>` signature without panicking.
424        if factors.iter().any(|f| f.is_empty()) {
425            return Vec::new();
426        }
427
428        // Default N: max factor level count, so each level of each factor
429        // appears at least once across the strata.
430        let n =
431            n_points.unwrap_or_else(|| factors.iter().map(|f| f.len()).max().unwrap_or(1).max(1));
432
433        if n == 0 {
434            return Vec::new();
435        }
436
437        let dimensions = factors.len();
438
439        // Reuse the QMC LHS sampler (Fisher–Yates shuffle + per-stratum
440        // uniform jitter, see `crate::random::qmc::LatinHypercubeSampler`).
441        let mut sampler =
442            LatinHypercubeSampler::<rand::prelude::ThreadRng>::with_seed(dimensions, seed);
443
444        // The QMC sampler can fail only when n == 0 (already handled). Use
445        // `unwrap_or_else` rather than `unwrap()` to honor the no-unwrap
446        // policy.
447        let unit_points = sampler
448            .sample(n)
449            .unwrap_or_else(|_| ::ndarray::Array2::<f64>::zeros((n, dimensions)));
450
451        // Map each [0, 1) coordinate to the corresponding factor level.
452        let mut design_points = Vec::with_capacity(n);
453        for i in 0..n {
454            let mut point = Vec::with_capacity(dimensions);
455            for (d, factor) in factors.iter().enumerate() {
456                let k = factor.len();
457                // The QMC sampler guarantees u in [0, 1); clamp the resulting
458                // index to `k - 1` to defensively handle any edge case where
459                // `u * k as f64` rounds to `k`.
460                let mut idx = (unit_points[[i, d]] * k as f64) as usize;
461                if idx >= k {
462                    idx = k - 1;
463                }
464                point.push(factor[idx]);
465            }
466            design_points.push(point);
467        }
468
469        design_points
470    }
471}
472
473/// A/B testing utilities
474pub mod ab_testing {
475    use super::*;
476
477    /// Split data into A/B test groups
478    pub fn split_ab_groups<T: Clone>(data: &[T], split_ratio: f64, seed: u64) -> (Vec<T>, Vec<T>) {
479        let mut rng = seeded_rng(seed);
480
481        let mut group_a = Vec::new();
482        let mut group_b = Vec::new();
483
484        for item in data {
485            if rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")) < split_ratio {
486                group_a.push(item.clone());
487            } else {
488                group_b.push(item.clone());
489            }
490        }
491
492        (group_a, group_b)
493    }
494
495    /// Generate balanced A/B test assignments
496    pub fn balanced_assignment(user_ids: &[String], seed: u64) -> HashMap<String, String> {
497        let mut rng = seeded_rng(seed);
498        let mut assignments = HashMap::new();
499
500        let mut shuffled_ids = user_ids.to_vec();
501        use rand::seq::SliceRandom;
502        shuffled_ids.shuffle(&mut rng.rng);
503
504        for (i, user_id) in shuffled_ids.iter().enumerate() {
505            let group = if i % 2 == 0 { "A" } else { "B" };
506            assignments.insert(user_id.clone(), group.to_string());
507        }
508
509        assignments
510    }
511
512    /// Multi-arm bandit assignment with Thompson sampling
513    pub fn thompson_sampling_assignment(
514        arms: &[String],
515        successes: &[u32],
516        failures: &[u32],
517        seed: u64,
518    ) -> String {
519        use crate::random::distributions::Beta;
520
521        let mut rng = seeded_rng(seed);
522        let mut max_sample = 0.0;
523        let mut best_arm = arms[0].clone();
524
525        for (i, arm) in arms.iter().enumerate() {
526            let alpha = successes[i] as f64 + 1.0;
527            let beta_param = failures[i] as f64 + 1.0;
528
529            if let Ok(beta_dist) = Beta::new(alpha, beta_param) {
530                let sample = beta_dist.sample(&mut rng);
531                if sample > max_sample {
532                    max_sample = sample;
533                    best_arm = arm.clone();
534                }
535            }
536        }
537
538        best_arm
539    }
540}
541
542#[cfg(test)]
543mod lhs_tests {
544    use super::*;
545
546    fn make_factors() -> Vec<Vec<f64>> {
547        vec![
548            vec![0.0, 1.0, 2.0, 3.0],
549            vec![10.0, 20.0],
550            vec![-5.0, 0.0, 5.0],
551        ]
552    }
553
554    #[test]
555    fn test_latin_hypercube_default_n_uses_max_levels() {
556        let factors = make_factors();
557        let design = ExperimentalDesign::latin_hypercube_design(&factors, None, 7);
558
559        // Default N = max(K) = 4 (first factor has 4 levels).
560        assert_eq!(design.len(), 4);
561        for point in &design {
562            assert_eq!(point.len(), factors.len());
563            // Every coordinate must come from the corresponding factor's levels.
564            for (d, value) in point.iter().enumerate() {
565                assert!(
566                    factors[d].iter().any(|level| (level - value).abs() < 1e-12),
567                    "value {} for dim {} not in factor levels",
568                    value,
569                    d,
570                );
571            }
572        }
573    }
574
575    #[test]
576    fn test_latin_hypercube_explicit_n_points() {
577        let factors = make_factors();
578        let n = 12;
579        let design = ExperimentalDesign::latin_hypercube_design(&factors, Some(n), 11);
580        assert_eq!(design.len(), n);
581        for point in &design {
582            assert_eq!(point.len(), factors.len());
583        }
584    }
585
586    #[test]
587    fn test_latin_hypercube_determinism_same_seed() {
588        let factors = make_factors();
589        let a = ExperimentalDesign::latin_hypercube_design(&factors, Some(8), 1234);
590        let b = ExperimentalDesign::latin_hypercube_design(&factors, Some(8), 1234);
591        assert_eq!(a, b);
592    }
593
594    #[test]
595    fn test_latin_hypercube_different_seeds_diverge() {
596        let factors = make_factors();
597        let a = ExperimentalDesign::latin_hypercube_design(&factors, Some(16), 42);
598        let b = ExperimentalDesign::latin_hypercube_design(&factors, Some(16), 43);
599        assert_ne!(a, b);
600    }
601
602    #[test]
603    fn test_latin_hypercube_stratification() {
604        // For N == K_d on every factor, every level must appear at least once
605        // along that factor — this is the LHS guarantee (one sample per stratum).
606        // Use uniform K so the default N = K satisfies the precondition.
607        let factors: Vec<Vec<f64>> = (0..3).map(|_| vec![0.0, 1.0, 2.0, 3.0, 4.0]).collect();
608        let design = ExperimentalDesign::latin_hypercube_design(&factors, None, 99);
609        assert_eq!(design.len(), 5);
610
611        for d in 0..3 {
612            let mut seen = vec![false; 5];
613            for point in &design {
614                let level_idx = factors[d]
615                    .iter()
616                    .position(|level| (level - point[d]).abs() < 1e-12)
617                    .expect("each LHS coordinate must map back to a factor level");
618                seen[level_idx] = true;
619            }
620            assert!(
621                seen.iter().all(|s| *s),
622                "stratification failed on dim {}: {:?}",
623                d,
624                seen,
625            );
626        }
627    }
628
629    #[test]
630    fn test_latin_hypercube_empty_factors() {
631        let design = ExperimentalDesign::latin_hypercube_design(&[], None, 1);
632        assert_eq!(design, vec![Vec::<f64>::new()]);
633    }
634
635    #[test]
636    fn test_latin_hypercube_zero_level_factor_returns_empty() {
637        let factors: Vec<Vec<f64>> = vec![vec![0.0, 1.0], Vec::new()];
638        let design = ExperimentalDesign::latin_hypercube_design(&factors, Some(4), 1);
639        assert!(design.is_empty());
640    }
641}