scirs2_core/random/
scientific.rs

1//! Scientific computing workflows for reproducible research
2//!
3//! This module provides high-level interfaces specifically designed for scientific computing
4//! and research applications where reproducibility, statistical validity, and advanced
5//! sampling methods are critical.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use scirs2_core::random::scientific::*;
11//!
12//! // Reproducible experiments
13//! let mut experiment = ReproducibleExperiment::new(42);
14//! let sample1 = experiment.next_sample(1000, StandardNormal);
15//! let sample2 = experiment.next_sample(1000, StandardNormal);
16//!
17//! // Statistical sampling
18//! let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
19//! let bootstrap = bootstrap_sample(&data, 1000, 500);
20//! let jackknife = jackknife_samples(&data);
21//!
22//! // Monte Carlo with variance reduction
23//! let mut mc = MonteCarloSampler::with_antithetic_variates(12345);
24//! let samples = mc.generate_correlated_pairs(1000);
25//! ```
26
27use crate::random::{
28    arrays::{random_normal_array, random_uniform_array, OptimizedArrayRandom},
29    core::{
30        scientific::{DeterministicState, ReproducibleSequence},
31        seeded_rng, Random,
32    },
33    qmc::{HaltonGenerator, LatinHypercubeSampler, LowDiscrepancySequence, SobolGenerator},
34    variance_reduction::{AntitheticSampling, ControlVariate},
35    ParallelRng, ThreadLocalRngPool,
36};
37use ::ndarray::{Array, Array1, Array2, Ix2};
38use rand::Rng;
39use rand_distr::{Distribution, Normal, Uniform};
40use std::collections::HashMap;
41
42/// Standard normal distribution (mean=0, std=1) for convenience
43#[derive(Debug, Clone, Copy)]
44pub struct StandardNormal;
45
46impl Distribution<f64> for StandardNormal {
47    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
48        Normal::new(0.0, 1.0).expect("Operation failed").sample(rng)
49    }
50}
51
52/// Standard uniform distribution [0, 1) for convenience
53#[derive(Debug, Clone, Copy)]
54pub struct StandardUniform;
55
56impl Distribution<f64> for StandardUniform {
57    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> f64 {
58        Uniform::new(0.0, 1.0)
59            .expect("Operation failed")
60            .sample(rng)
61    }
62}
63
64/// Reproducible experiment manager for scientific research
65pub struct ReproducibleExperiment {
66    sequence: ReproducibleSequence,
67    state: DeterministicState,
68}
69
70impl ReproducibleExperiment {
71    /// Create a new reproducible experiment with a seed
72    pub fn new(seed: u64) -> Self {
73        Self {
74            sequence: ReproducibleSequence::new(seed),
75            state: DeterministicState::new(seed),
76        }
77    }
78
79    /// Generate the next sample in the reproducible sequence
80    pub fn next_sample<D: Distribution<f64> + Copy>(
81        &mut self,
82        size: usize,
83        distribution: D,
84    ) -> Vec<f64> {
85        let mut rng = self.state.next_rng();
86        rng.sample_vec(distribution, size)
87    }
88
89    /// Generate the next array sample
90    pub fn next_array_sample<D: Distribution<f64> + Copy>(
91        &mut self,
92        shape: [usize; 2],
93        distribution: D,
94    ) -> Array2<f64> {
95        let mut rng = self.state.next_rng();
96        Array::random_bulk(Ix2(shape[0], shape[1]), distribution, &mut rng)
97    }
98
99    /// Reset the experiment to its initial state
100    pub fn reset(&mut self) {
101        self.sequence.reset();
102        self.state = DeterministicState::new(self.state.current_state().0);
103    }
104
105    /// Get current experiment state for logging/checkpointing
106    pub fn current_state(&self) -> (u64, u64) {
107        self.state.current_state()
108    }
109}
110
111/// Bootstrap sampling for statistical inference
112pub fn bootstrap_sample<T: Clone + Send + Sync>(
113    data: &[T],
114    n_bootstrap: usize,
115    sample_size: usize,
116) -> Vec<Vec<T>> {
117    let pool = ThreadLocalRngPool::new(42);
118    ParallelRng::parallel_bootstrap(data, n_bootstrap, &pool)
119        .into_iter()
120        .map(|sample| sample.into_iter().take(sample_size).collect())
121        .collect()
122}
123
124/// Jackknife resampling (leave-one-out)
125pub fn jackknife_samples<T: Clone>(data: &[T]) -> Vec<Vec<T>> {
126    (0..data.len())
127        .map(|i| {
128            data.iter()
129                .enumerate()
130                .filter(|(idx, _)| *idx != i)
131                .map(|(_, item)| item.clone())
132                .collect()
133        })
134        .collect()
135}
136
137/// Cross-validation splits
138pub fn cross_validation_splits<T: Clone>(
139    data: &[T],
140    k_folds: usize,
141    seed: u64,
142) -> Vec<(Vec<T>, Vec<T>)> {
143    let mut rng = seeded_rng(seed);
144    let mut indices: Vec<usize> = (0..data.len()).collect();
145
146    // Shuffle indices for random splits
147    use rand::seq::SliceRandom;
148    indices.shuffle(&mut rng.rng);
149
150    let fold_size = data.len() / k_folds;
151
152    (0..k_folds)
153        .map(|fold| {
154            let test_start = fold * fold_size;
155            let test_end = if fold == k_folds - 1 {
156                data.len()
157            } else {
158                test_start + fold_size
159            };
160
161            let test_indices = &indices[test_start..test_end];
162            let train_indices: Vec<usize> = indices
163                .iter()
164                .filter(|&&idx| !test_indices.contains(&idx))
165                .copied()
166                .collect();
167
168            let train_data = train_indices.iter().map(|&i| data[i].clone()).collect();
169            let test_data = test_indices.iter().map(|&i| data[i].clone()).collect();
170
171            (train_data, test_data)
172        })
173        .collect()
174}
175
176/// Monte Carlo sampler with variance reduction techniques
177pub struct MonteCarloSampler {
178    antithetic: Option<AntitheticSampling<rand::rngs::StdRng>>,
179    control_variate: Option<ControlVariate>,
180    base_seed: u64,
181}
182
183impl MonteCarloSampler {
184    /// Create a basic Monte Carlo sampler
185    pub fn new(seed: u64) -> Self {
186        Self {
187            antithetic: None,
188            control_variate: None,
189            base_seed: seed,
190        }
191    }
192
193    /// Create a sampler with antithetic variates for variance reduction
194    pub fn with_antithetic_variates(seed: u64) -> Self {
195        Self {
196            antithetic: Some(AntitheticSampling::new(seeded_rng(seed))),
197            control_variate: None,
198            base_seed: seed,
199        }
200    }
201
202    /// Create a sampler with control variates
203    pub fn with_control_variate(seed: u64, control_mean: f64) -> Self {
204        Self {
205            antithetic: None,
206            control_variate: Some(ControlVariate::new(control_mean)),
207            base_seed: seed,
208        }
209    }
210
211    /// Generate correlated sample pairs for variance reduction
212    pub fn generate_correlated_pairs(&mut self, count: usize) -> (Vec<f64>, Vec<f64>) {
213        if let Some(ref mut antithetic) = self.antithetic {
214            antithetic.generate_antithetic_pairs(count)
215        } else {
216            let mut rng = seeded_rng(self.base_seed);
217            let samples1: Vec<f64> = (0..count)
218                .map(|_| rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")))
219                .collect();
220            let samples2: Vec<f64> = (0..count)
221                .map(|_| rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")))
222                .collect();
223            (samples1, samples2)
224        }
225    }
226
227    /// Generate stratified samples for more uniform coverage
228    pub fn stratified_samples(&mut self, strata: usize, samples_per_stratum: usize) -> Vec<f64> {
229        if let Some(ref mut antithetic) = self.antithetic {
230            antithetic.stratified_samples(strata, samples_per_stratum)
231        } else {
232            let mut rng = seeded_rng(self.base_seed);
233            let mut all_samples = Vec::new();
234
235            for i in 0..strata {
236                let stratum_start = i as f64 / strata as f64;
237                let stratum_end = (i + 1) as f64 / strata as f64;
238
239                for _ in 0..samples_per_stratum {
240                    let u = rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed"));
241                    let sample = stratum_start + u * (stratum_end - stratum_start);
242                    all_samples.push(sample);
243                }
244            }
245
246            all_samples
247        }
248    }
249}
250
251/// Quasi-Monte Carlo sampling for better coverage
252pub struct QuasiMonteCarloSampler {
253    sobol: Option<SobolGenerator>,
254    halton: Option<HaltonGenerator>,
255    lhs: Option<LatinHypercubeSampler<rand::rngs::ThreadRng>>,
256    dimensions: usize,
257}
258
259impl QuasiMonteCarloSampler {
260    /// Create a Sobol sequence sampler
261    pub fn sobol(dimensions: usize) -> Result<Self, String> {
262        Ok(Self {
263            sobol: Some(SobolGenerator::dimension(dimensions).map_err(|e| e.to_string())?),
264            halton: None,
265            lhs: None,
266            dimensions,
267        })
268    }
269
270    /// Create a Halton sequence sampler
271    pub fn halton(dimensions: usize) -> Self {
272        Self {
273            sobol: None,
274            halton: Some(HaltonGenerator::new(
275                &(2u32..2u32 + dimensions as u32).collect::<Vec<_>>(),
276            )),
277            lhs: None,
278            dimensions,
279        }
280    }
281
282    /// Create a Latin Hypercube sampler
283    pub fn latin_hypercube(dimensions: usize) -> Self {
284        Self {
285            sobol: None,
286            halton: None,
287            lhs: Some(LatinHypercubeSampler::<rand::rngs::ThreadRng>::new(
288                dimensions,
289            )),
290            dimensions,
291        }
292    }
293
294    /// Generate quasi-random points
295    pub fn generate_points(&mut self, count: usize) -> Result<Vec<Vec<f64>>, String> {
296        if let Some(ref mut sobol) = self.sobol {
297            let array = sobol.generate(count);
298            let result: Vec<Vec<f64>> = array.rows().into_iter().map(|row| row.to_vec()).collect();
299            Ok(result)
300        } else if let Some(ref mut halton) = self.halton {
301            let array = halton.generate(count);
302            let result: Vec<Vec<f64>> = array.rows().into_iter().map(|row| row.to_vec()).collect();
303            Ok(result)
304        } else if let Some(ref mut lhs) = self.lhs {
305            let array = lhs.sample(count).map_err(|e| e.to_string())?;
306            Ok((0..count)
307                .map(|i| (0..self.dimensions).map(|j| array[[i, j]]).collect())
308                .collect())
309        } else {
310            Err("No QMC generator configured".to_string())
311        }
312    }
313}
314
315/// Design of experiments helper
316pub struct ExperimentalDesign;
317
318impl ExperimentalDesign {
319    /// Generate a factorial design
320    pub fn factorial_design(factors: &[Vec<f64>]) -> Vec<Vec<f64>> {
321        if factors.is_empty() {
322            return vec![vec![]];
323        }
324
325        let mut designs = vec![vec![]];
326
327        for factor_levels in factors {
328            let mut new_designs = Vec::new();
329            for design in &designs {
330                for &level in factor_levels {
331                    let mut new_design = design.clone();
332                    new_design.push(level);
333                    new_designs.push(new_design);
334                }
335            }
336            designs = new_designs;
337        }
338
339        designs
340    }
341
342    /// Generate a random fractional factorial design
343    pub fn fractional_factorial_design(
344        factors: &[Vec<f64>],
345        fraction: f64,
346        seed: u64,
347    ) -> Vec<Vec<f64>> {
348        let full_design = Self::factorial_design(factors);
349        let sample_size = (full_design.len() as f64 * fraction).ceil() as usize;
350
351        let mut rng = seeded_rng(seed);
352        use rand::seq::SliceRandom;
353        let mut sampled_design = full_design;
354        sampled_design.shuffle(&mut rng.rng);
355        sampled_design.truncate(sample_size);
356
357        sampled_design
358    }
359
360    /// Generate a central composite design
361    pub fn central_composite_design(dimensions: usize, alpha: f64) -> Vec<Vec<f64>> {
362        let mut design = Vec::new();
363
364        // Factorial points (corners of hypercube)
365        let factorial_factors: Vec<Vec<f64>> = (0..dimensions).map(|_| vec![-1.0, 1.0]).collect();
366        design.extend(Self::factorial_design(&factorial_factors));
367
368        // Axial points
369        for dim in 0..dimensions {
370            let mut point_pos = vec![0.0; dimensions];
371            let mut point_neg = vec![0.0; dimensions];
372            point_pos[dim] = alpha;
373            point_neg[dim] = -alpha;
374            design.push(point_pos);
375            design.push(point_neg);
376        }
377
378        // Center point
379        design.push(vec![0.0; dimensions]);
380
381        design
382    }
383}
384
385/// A/B testing utilities
386pub mod ab_testing {
387    use super::*;
388
389    /// Split data into A/B test groups
390    pub fn split_ab_groups<T: Clone>(data: &[T], split_ratio: f64, seed: u64) -> (Vec<T>, Vec<T>) {
391        let mut rng = seeded_rng(seed);
392
393        let mut group_a = Vec::new();
394        let mut group_b = Vec::new();
395
396        for item in data {
397            if rng.sample(Uniform::new(0.0, 1.0).expect("Operation failed")) < split_ratio {
398                group_a.push(item.clone());
399            } else {
400                group_b.push(item.clone());
401            }
402        }
403
404        (group_a, group_b)
405    }
406
407    /// Generate balanced A/B test assignments
408    pub fn balanced_assignment(user_ids: &[String], seed: u64) -> HashMap<String, String> {
409        let mut rng = seeded_rng(seed);
410        let mut assignments = HashMap::new();
411
412        let mut shuffled_ids = user_ids.to_vec();
413        use rand::seq::SliceRandom;
414        shuffled_ids.shuffle(&mut rng.rng);
415
416        for (i, user_id) in shuffled_ids.iter().enumerate() {
417            let group = if i % 2 == 0 { "A" } else { "B" };
418            assignments.insert(user_id.clone(), group.to_string());
419        }
420
421        assignments
422    }
423
424    /// Multi-arm bandit assignment with Thompson sampling
425    pub fn thompson_sampling_assignment(
426        arms: &[String],
427        successes: &[u32],
428        failures: &[u32],
429        seed: u64,
430    ) -> String {
431        use crate::random::distributions::Beta;
432
433        let mut rng = seeded_rng(seed);
434        let mut max_sample = 0.0;
435        let mut best_arm = arms[0].clone();
436
437        for (i, arm) in arms.iter().enumerate() {
438            let alpha = successes[i] as f64 + 1.0;
439            let beta_param = failures[i] as f64 + 1.0;
440
441            if let Ok(beta_dist) = Beta::new(alpha, beta_param) {
442                let sample = beta_dist.sample(&mut rng);
443                if sample > max_sample {
444                    max_sample = sample;
445                    best_arm = arm.clone();
446                }
447            }
448        }
449
450        best_arm
451    }
452}