pub fn time_series_split(
n_samples: usize,
n_splits: usize,
n_test_samples: usize,
gap: usize,
) -> Result<CrossValidationFolds>Expand description
Performs time series cross-validation splitting
Creates splits suitable for time series data where future observations
should not be used to predict past observations. Each training set contains
all observations up to a certain point, and the validation set contains
the next n_test_samples observations.
§Arguments
n_samples- Number of samples in the datasetn_splits- Number of splits to createn_test_samples- Number of samples in each test setgap- Number of samples to skip between train and test sets (default: 0)
§Returns
A vector of (train_indices, validation_indices) tuples for each split
§Examples
use scirs2__datasets::utils::time_series_split;
let folds = time_series_split(100, 5, 10, 0).unwrap();
assert_eq!(folds.len(), 5);
// Training sets should be increasing in size
for i in 1..folds.len() {
assert!(folds[i].0.len() > folds[i-1].0.len());
}Examples found in repository?
examples/datasets_cross_validation_demo.rs (line 72)
10fn main() {
11 println!("=== Cross-Validation Demonstration ===\n");
12
13 // Create sample dataset
14 let data = Array2::from_shape_vec((20, 3), (0..60).map(|x| x as f64 / 10.0).collect()).unwrap();
15 let target = Array1::from(
16 (0..20)
17 .map(|i| if i % 2 == 0 { 0.0 } else { 1.0 })
18 .collect::<Vec<_>>(),
19 );
20
21 let dataset = Dataset::new(data.clone(), Some(target.clone()))
22 .with_description("Sample dataset for cross-validation demo".to_string());
23
24 println!("Dataset info:");
25 println!("- Samples: {}", dataset.n_samples());
26 println!("- Features: {}", dataset.n_features());
27 println!("- Description: {}\n", dataset.description.as_ref().unwrap());
28
29 // Demonstrate K-fold cross-validation
30 println!("=== K-Fold Cross-Validation (k=5) ===");
31 let k_folds = k_fold_split(dataset.n_samples(), 5, true, Some(42)).unwrap();
32
33 for (i, (train_indices, val_indices)) in k_folds.iter().enumerate() {
34 println!(
35 "Fold {}: Train, size: {}, Validation size: {}",
36 i + 1,
37 train_indices.len(),
38 val_indices.len()
39 );
40 println!(
41 " Train indices: {:?}",
42 &train_indices[..5.min(train_indices.len())]
43 );
44 println!(" Val indices: {val_indices:?}");
45 }
46 println!();
47
48 // Demonstrate Stratified K-fold cross-validation
49 println!("=== Stratified K-Fold Cross-Validation (k=4) ===");
50 let stratified_folds = stratified_k_fold_split(&target, 4, true, Some(42)).unwrap();
51
52 for (i, (train_indices, val_indices)) in stratified_folds.iter().enumerate() {
53 // Calculate class distribution in validation set
54 let val_targets: Vec<f64> = val_indices.iter().map(|&idx| target[idx]).collect();
55 let class_0_count = val_targets.iter().filter(|&&x| x == 0.0).count();
56 let class_1_count = val_targets.iter().filter(|&&x| x == 1.0).count();
57
58 println!(
59 "Fold {}: Train, size: {}, Validation size: {}",
60 i + 1,
61 train_indices.len(),
62 val_indices.len()
63 );
64 println!(
65 " Class distribution in validation: Class 0: {class_0_count}, Class 1: {class_1_count}"
66 );
67 }
68 println!();
69
70 // Demonstrate Time Series cross-validation
71 println!("=== Time Series Cross-Validation ===");
72 let ts_folds = time_series_split(dataset.n_samples(), 3, 3, 1).unwrap();
73
74 for (i, (train_indices, val_indices)) in ts_folds.iter().enumerate() {
75 println!(
76 "Split {}: Train, size: {}, Test size: {}",
77 i + 1,
78 train_indices.len(),
79 val_indices.len()
80 );
81 println!(
82 " Train range: {} to {}",
83 train_indices.first().unwrap_or(&0),
84 train_indices.last().unwrap_or(&0)
85 );
86 println!(
87 " Test range: {} to {}",
88 val_indices.first().unwrap_or(&0),
89 val_indices.last().unwrap_or(&0)
90 );
91 }
92 println!();
93
94 // Demonstrate usage with Dataset methods
95 println!("=== Using Cross-Validation with Dataset ===");
96 let first_fold = &k_folds[0];
97 let (train_indices, val_indices) = first_fold;
98
99 // Create training subset
100 let traindata = data.select(ndarray::Axis(0), train_indices);
101 let train_target = target.select(ndarray::Axis(0), train_indices);
102 let traindataset = Dataset::new(traindata, Some(train_target))
103 .with_description("Training fold from K-fold CV".to_string());
104
105 // Create validation subset
106 let valdata = data.select(ndarray::Axis(0), val_indices);
107 let val_target = target.select(ndarray::Axis(0), val_indices);
108 let valdataset = Dataset::new(valdata, Some(val_target))
109 .with_description("Validation fold from K-fold CV".to_string());
110
111 println!(
112 "Training dataset: {} samples, {} features",
113 traindataset.n_samples(),
114 traindataset.n_features()
115 );
116 println!(
117 "Validation dataset: {} samples, {} features",
118 valdataset.n_samples(),
119 valdataset.n_features()
120 );
121
122 println!("\n=== Cross-Validation Demo Complete ===");
123}