pub fn random_sample(
n_samples: usize,
sample_size: usize,
replace: bool,
random_seed: Option<u64>,
) -> Result<Vec<usize>>
Expand description
Performs random sampling with or without replacement
This function creates random samples from a dataset using either bootstrap sampling (with replacement) or standard random sampling (without replacement).
§Arguments
n_samples
- Total number of samples in the datasetsample_size
- Number of samples to drawreplace
- Whether to sample with replacement (bootstrap)random_seed
- Optional random seed for reproducible sampling
§Returns
A vector of indices representing the sampled data points
§Examples
use scirs2_datasets::utils::random_sample;
// Sample 5 indices from 10 total samples without replacement
let indices = random_sample(10, 5, false, Some(42)).unwrap();
assert_eq!(indices.len(), 5);
assert!(indices.iter().all(|&i| i < 10));
// Bootstrap sampling (with replacement)
let bootstrap_indices = random_sample(10, 15, true, Some(42)).unwrap();
assert_eq!(bootstrap_indices.len(), 15);
Examples found in repository?
examples/sampling_demo.rs (line 28)
9fn main() {
10 println!("=== Sampling and Bootstrapping Demonstration ===\n");
11
12 // Load the Iris dataset for demonstration
13 let iris = load_iris().unwrap();
14 let n_samples = iris.n_samples();
15
16 println!("Original Iris dataset:");
17 println!("- Samples: {}", n_samples);
18 println!("- Features: {}", iris.n_features());
19
20 if let Some(target) = &iris.target {
21 let class_counts = count_classes(target);
22 println!("- Class distribution: {:?}\n", class_counts);
23 }
24
25 // Demonstrate random sampling without replacement
26 println!("=== Random Sampling (without replacement) ===");
27 let sample_size = 30;
28 let random_indices = random_sample(n_samples, sample_size, false, Some(42)).unwrap();
29
30 println!(
31 "Sampled {} indices from {} total samples",
32 sample_size, n_samples
33 );
34 println!(
35 "Sample indices: {:?}",
36 &random_indices[..10.min(random_indices.len())]
37 );
38
39 // Create a subset dataset
40 let sample_data = iris.data.select(ndarray::Axis(0), &random_indices);
41 let sample_target = iris
42 .target
43 .as_ref()
44 .map(|t| t.select(ndarray::Axis(0), &random_indices));
45 let sample_dataset = Dataset::new(sample_data, sample_target)
46 .with_description("Random sample from Iris dataset".to_string());
47
48 println!(
49 "Random sample dataset: {} samples, {} features",
50 sample_dataset.n_samples(),
51 sample_dataset.n_features()
52 );
53
54 if let Some(target) = &sample_dataset.target {
55 let sample_class_counts = count_classes(target);
56 println!("Sample class distribution: {:?}\n", sample_class_counts);
57 }
58
59 // Demonstrate bootstrap sampling (with replacement)
60 println!("=== Bootstrap Sampling (with replacement) ===");
61 let bootstrap_size = 200; // More than original dataset size
62 let bootstrap_indices = random_sample(n_samples, bootstrap_size, true, Some(42)).unwrap();
63
64 println!(
65 "Bootstrap sampled {} indices from {} total samples",
66 bootstrap_size, n_samples
67 );
68 println!(
69 "Bootstrap may have duplicates - first 10 indices: {:?}",
70 &bootstrap_indices[..10]
71 );
72
73 // Count frequency of each index in bootstrap sample
74 let mut index_counts = vec![0; n_samples];
75 for &idx in &bootstrap_indices {
76 index_counts[idx] += 1;
77 }
78 let max_count = *index_counts.iter().max().unwrap();
79 let zero_count = index_counts.iter().filter(|&&count| count == 0).count();
80
81 println!("Bootstrap statistics:");
82 println!("- Maximum frequency of any sample: {}", max_count);
83 println!(
84 "- Number of original samples not selected: {}\n",
85 zero_count
86 );
87
88 // Demonstrate stratified sampling
89 println!("=== Stratified Sampling ===");
90 if let Some(target) = &iris.target {
91 let stratified_size = 30;
92 let stratified_indices = stratified_sample(target, stratified_size, Some(42)).unwrap();
93
94 println!(
95 "Stratified sampled {} indices maintaining class proportions",
96 stratified_size
97 );
98
99 // Create stratified subset
100 let stratified_data = iris.data.select(ndarray::Axis(0), &stratified_indices);
101 let stratified_target = target.select(ndarray::Axis(0), &stratified_indices);
102 let stratified_dataset = Dataset::new(stratified_data, Some(stratified_target))
103 .with_description("Stratified sample from Iris dataset".to_string());
104
105 println!(
106 "Stratified sample dataset: {} samples, {} features",
107 stratified_dataset.n_samples(),
108 stratified_dataset.n_features()
109 );
110
111 let stratified_class_counts = count_classes(&stratified_dataset.target.unwrap());
112 println!(
113 "Stratified sample class distribution: {:?}",
114 stratified_class_counts
115 );
116
117 // Verify proportions are maintained
118 let original_proportions = calculate_proportions(&count_classes(target));
119 let stratified_proportions = calculate_proportions(&stratified_class_counts);
120
121 println!("Class proportion comparison:");
122 for (&class, &original_prop) in &original_proportions {
123 let stratified_prop = stratified_proportions.get(&class).unwrap_or(&0.0);
124 println!(
125 " Class {}: Original {:.2}%, Stratified {:.2}%",
126 class,
127 original_prop * 100.0,
128 stratified_prop * 100.0
129 );
130 }
131 }
132
133 // Demonstrate practical use case: creating training/validation splits
134 println!("\n=== Practical Example: Multiple Train/Validation Splits ===");
135 for i in 1..=3 {
136 let split_indices = random_sample(n_samples, 100, false, Some(42 + i)).unwrap();
137 let (train_indices, val_indices) = split_indices.split_at(80);
138
139 println!(
140 "Split {}: {} training samples, {} validation samples",
141 i,
142 train_indices.len(),
143 val_indices.len()
144 );
145 }
146
147 println!("\n=== Sampling Demo Complete ===");
148}