pub fn random_sample(
n_samples: usize,
sample_size: usize,
replace: bool,
random_seed: Option<u64>,
) -> Result<Vec<usize>>Expand description
Performs random sampling with or without replacement
This function creates random samples from a dataset using either bootstrap sampling (with replacement) or standard random sampling (without replacement).
§Arguments
n_samples- Total number of samples in the datasetsample_size- Number of samples to drawreplace- Whether to sample with replacement (bootstrap)random_seed- Optional random seed for reproducible sampling
§Returns
A vector of indices representing the sampled data points
§Examples
use scirs2__datasets::utils::random_sample;
// Sample 5 indices from 10 total samples without replacement
let indices = random_sample(10, 5, false, Some(42)).unwrap();
assert_eq!(indices.len(), 5);
assert!(indices.iter().all(|&i| i < 10));
// Bootstrap sampling (with replacement)
let bootstrap_indices = random_sample(10, 15, true, Some(42)).unwrap();
assert_eq!(bootstrap_indices.len(), 15);Examples found in repository?
examples/sampling_demo.rs (line 29)
10fn main() {
11 println!("=== Sampling and Bootstrapping Demonstration ===\n");
12
13 // Load the Iris dataset for demonstration
14 let iris = load_iris().unwrap();
15 let n_samples = iris.n_samples();
16
17 println!("Original Iris dataset:");
18 println!("- Samples: {n_samples}");
19 println!("- Features: {}", iris.n_features());
20
21 if let Some(target) = &iris.target {
22 let class_counts = count_classes(target);
23 println!("- Class distribution: {class_counts:?}\n");
24 }
25
26 // Demonstrate random sampling without replacement
27 println!("=== Random Sampling (without replacement) ===");
28 let samplesize = 30;
29 let random_indices = random_sample(n_samples, samplesize, false, Some(42)).unwrap();
30
31 println!("Sampled {samplesize} indices from {n_samples} total samples");
32 println!(
33 "Sample indices: {:?}",
34 &random_indices[..10.min(random_indices.len())]
35 );
36
37 // Create a subset dataset
38 let sampledata = iris.data.select(ndarray::Axis(0), &random_indices);
39 let sample_target = iris
40 .target
41 .as_ref()
42 .map(|t| t.select(ndarray::Axis(0), &random_indices));
43 let sampledataset = Dataset::new(sampledata, sample_target)
44 .with_description("Random sample from Iris dataset".to_string());
45
46 println!(
47 "Random sample dataset: {} samples, {} features",
48 sampledataset.n_samples(),
49 sampledataset.n_features()
50 );
51
52 if let Some(target) = &sampledataset.target {
53 let sample_class_counts = count_classes(target);
54 println!("Sample class distribution: {sample_class_counts:?}\n");
55 }
56
57 // Demonstrate bootstrap sampling (with replacement)
58 println!("=== Bootstrap Sampling (with replacement) ===");
59 let bootstrapsize = 200; // More than original dataset size
60 let bootstrap_indices = random_sample(n_samples, bootstrapsize, true, Some(42)).unwrap();
61
62 println!("Bootstrap sampled {bootstrapsize} indices from {n_samples} total samples");
63 println!(
64 "Bootstrap may have duplicates - first 10 indices: {:?}",
65 &bootstrap_indices[..10]
66 );
67
68 // Count frequency of each index in bootstrap sample
69 let mut index_counts = vec![0; n_samples];
70 for &idx in &bootstrap_indices {
71 index_counts[idx] += 1;
72 }
73 let max_count = *index_counts.iter().max().unwrap();
74 let zero_count = index_counts.iter().filter(|&&count| count == 0).count();
75
76 println!("Bootstrap statistics:");
77 println!("- Maximum frequency of any sample: {max_count}");
78 println!("- Number of original samples not selected: {zero_count}\n");
79
80 // Demonstrate stratified sampling
81 println!("=== Stratified Sampling ===");
82 if let Some(target) = &iris.target {
83 let stratifiedsize = 30;
84 let stratified_indices = stratified_sample(target, stratifiedsize, Some(42)).unwrap();
85
86 println!("Stratified sampled {stratifiedsize} indices maintaining class proportions");
87
88 // Create stratified subset
89 let stratifieddata = iris.data.select(ndarray::Axis(0), &stratified_indices);
90 let stratified_target = target.select(ndarray::Axis(0), &stratified_indices);
91 let stratifieddataset = Dataset::new(stratifieddata, Some(stratified_target))
92 .with_description("Stratified sample from Iris dataset".to_string());
93
94 println!(
95 "Stratified sample dataset: {} samples, {} features",
96 stratifieddataset.n_samples(),
97 stratifieddataset.n_features()
98 );
99
100 let stratified_class_counts = count_classes(&stratifieddataset.target.unwrap());
101 println!("Stratified sample class distribution: {stratified_class_counts:?}");
102
103 // Verify proportions are maintained
104 let original_proportions = calculate_proportions(&count_classes(target));
105 let stratified_proportions = calculate_proportions(&stratified_class_counts);
106
107 println!("Class proportion comparison:");
108 for (&class, &original_prop) in &original_proportions {
109 let stratified_prop = stratified_proportions.get(&class).unwrap_or(&0.0);
110 println!(
111 " Class {}: Original {:.2}%, Stratified {:.2}%",
112 class,
113 original_prop * 100.0,
114 stratified_prop * 100.0
115 );
116 }
117 }
118
119 // Demonstrate practical use case: creating training/validation splits
120 println!("\n=== Practical Example: Multiple Train/Validation Splits ===");
121 for i in 1..=3 {
122 let split_indices = random_sample(n_samples, 100, false, Some(42 + i)).unwrap();
123 let (train_indices, val_indices) = split_indices.split_at(80);
124
125 println!(
126 "Split {}: {} training samples, {} validation samples",
127 i,
128 train_indices.len(),
129 val_indices.len()
130 );
131 }
132
133 println!("\n=== Sampling Demo Complete ===");
134}