pub struct Dataset {
pub data: Array2<f64>,
pub target: Option<Array1<f64>>,
pub targetnames: Option<Vec<String>>,
pub featurenames: Option<Vec<String>>,
pub feature_descriptions: Option<Vec<String>>,
pub description: Option<String>,
pub metadata: HashMap<String, String>,
}Expand description
Represents a dataset with features, optional targets, and metadata
The Dataset struct is the core data structure for managing machine learning datasets. It stores the feature matrix, optional target values, and rich metadata including feature names, descriptions, and arbitrary key-value pairs.
§Examples
use scirs2_core::ndarray::Array2;
use scirs2_datasets::utils::Dataset;
let data = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
let dataset = Dataset::new(data, None)
.with_featurenames(vec!["feature1".to_string(), "feature2".to_string()])
.with_description("Sample dataset".to_string());
assert_eq!(dataset.n_samples(), 3);
assert_eq!(dataset.n_features(), 2);Fields§
§data: Array2<f64>Features/data matrix (n_samples, n_features)
target: Option<Array1<f64>>Optional target values
targetnames: Option<Vec<String>>Optional target names for classification problems
featurenames: Option<Vec<String>>Optional feature names
feature_descriptions: Option<Vec<String>>Optional descriptions for each feature
description: Option<String>Optional dataset description
metadata: HashMap<String, String>Optional dataset metadata
Implementations§
Source§impl Dataset
impl Dataset
Sourcepub fn new(data: Array2<f64>, target: Option<Array1<f64>>) -> Self
pub fn new(data: Array2<f64>, target: Option<Array1<f64>>) -> Self
Create a new dataset with the given data and target
§Arguments
data- The feature matrix (n_samples, n_features)target- Optional target values (n_samples,)
§Returns
A new Dataset instance with empty metadata
§Examples
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_datasets::utils::Dataset;
let data = Array2::zeros((100, 5));
let target = Some(Array1::zeros(100));
let dataset = Dataset::new(data, target);Examples found in repository?
10fn main() {
11 println!("=== Cross-Validation Demonstration ===\n");
12
13 // Create sample dataset
14 let data = Array2::from_shape_vec((20, 3), (0..60).map(|x| x as f64 / 10.0).collect()).unwrap();
15 let target = Array1::from(
16 (0..20)
17 .map(|i| if i % 2 == 0 { 0.0 } else { 1.0 })
18 .collect::<Vec<_>>(),
19 );
20
21 let dataset = Dataset::new(data.clone(), Some(target.clone()))
22 .with_description("Sample dataset for cross-validation demo".to_string());
23
24 println!("Dataset info:");
25 println!("- Samples: {}", dataset.n_samples());
26 println!("- Features: {}", dataset.n_features());
27 println!("- Description: {}\n", dataset.description.as_ref().unwrap());
28
29 // Demonstrate K-fold cross-validation
30 println!("=== K-Fold Cross-Validation (k=5) ===");
31 let k_folds = k_fold_split(dataset.n_samples(), 5, true, Some(42)).unwrap();
32
33 for (i, (train_indices, val_indices)) in k_folds.iter().enumerate() {
34 println!(
35 "Fold {}: Train, size: {}, Validation size: {}",
36 i + 1,
37 train_indices.len(),
38 val_indices.len()
39 );
40 println!(
41 " Train indices: {:?}",
42 &train_indices[..5.min(train_indices.len())]
43 );
44 println!(" Val indices: {val_indices:?}");
45 }
46 println!();
47
48 // Demonstrate Stratified K-fold cross-validation
49 println!("=== Stratified K-Fold Cross-Validation (k=4) ===");
50 let stratified_folds = stratified_k_fold_split(&target, 4, true, Some(42)).unwrap();
51
52 for (i, (train_indices, val_indices)) in stratified_folds.iter().enumerate() {
53 // Calculate class distribution in validation set
54 let val_targets: Vec<f64> = val_indices.iter().map(|&idx| target[idx]).collect();
55 let class_0_count = val_targets.iter().filter(|&&x| x == 0.0).count();
56 let class_1_count = val_targets.iter().filter(|&&x| x == 1.0).count();
57
58 println!(
59 "Fold {}: Train, size: {}, Validation size: {}",
60 i + 1,
61 train_indices.len(),
62 val_indices.len()
63 );
64 println!(
65 " Class distribution in validation: Class 0: {class_0_count}, Class 1: {class_1_count}"
66 );
67 }
68 println!();
69
70 // Demonstrate Time Series cross-validation
71 println!("=== Time Series Cross-Validation ===");
72 let ts_folds = time_series_split(dataset.n_samples(), 3, 3, 1).unwrap();
73
74 for (i, (train_indices, val_indices)) in ts_folds.iter().enumerate() {
75 println!(
76 "Split {}: Train, size: {}, Test size: {}",
77 i + 1,
78 train_indices.len(),
79 val_indices.len()
80 );
81 println!(
82 " Train range: {} to {}",
83 train_indices.first().unwrap_or(&0),
84 train_indices.last().unwrap_or(&0)
85 );
86 println!(
87 " Test range: {} to {}",
88 val_indices.first().unwrap_or(&0),
89 val_indices.last().unwrap_or(&0)
90 );
91 }
92 println!();
93
94 // Demonstrate usage with Dataset methods
95 println!("=== Using Cross-Validation with Dataset ===");
96 let first_fold = &k_folds[0];
97 let (train_indices, val_indices) = first_fold;
98
99 // Create training subset
100 let traindata = data.select(scirs2_core::ndarray::Axis(0), train_indices);
101 let train_target = target.select(scirs2_core::ndarray::Axis(0), train_indices);
102 let traindataset = Dataset::new(traindata, Some(train_target))
103 .with_description("Training fold from K-fold CV".to_string());
104
105 // Create validation subset
106 let valdata = data.select(scirs2_core::ndarray::Axis(0), val_indices);
107 let val_target = target.select(scirs2_core::ndarray::Axis(0), val_indices);
108 let valdataset = Dataset::new(valdata, Some(val_target))
109 .with_description("Validation fold from K-fold CV".to_string());
110
111 println!(
112 "Training dataset: {} samples, {} features",
113 traindataset.n_samples(),
114 traindataset.n_features()
115 );
116 println!(
117 "Validation dataset: {} samples, {} features",
118 valdataset.n_samples(),
119 valdataset.n_features()
120 );
121
122 println!("\n=== Cross-Validation Demo Complete ===");
123}More examples
10fn main() {
11 println!("=== Sampling and Bootstrapping Demonstration ===\n");
12
13 // Load the Iris dataset for demonstration
14 let iris = load_iris().unwrap();
15 let n_samples = iris.n_samples();
16
17 println!("Original Iris dataset:");
18 println!("- Samples: {n_samples}");
19 println!("- Features: {}", iris.n_features());
20
21 if let Some(target) = &iris.target {
22 let class_counts = count_classes(target);
23 println!("- Class distribution: {class_counts:?}\n");
24 }
25
26 // Demonstrate random sampling without replacement
27 println!("=== Random Sampling (without replacement) ===");
28 let samplesize = 30;
29 let random_indices = random_sample(n_samples, samplesize, false, Some(42)).unwrap();
30
31 println!("Sampled {samplesize} indices from {n_samples} total samples");
32 println!(
33 "Sample indices: {:?}",
34 &random_indices[..10.min(random_indices.len())]
35 );
36
37 // Create a subset dataset
38 let sampledata = iris
39 .data
40 .select(scirs2_core::ndarray::Axis(0), &random_indices);
41 let sample_target = iris
42 .target
43 .as_ref()
44 .map(|t| t.select(scirs2_core::ndarray::Axis(0), &random_indices));
45 let sampledataset = Dataset::new(sampledata, sample_target)
46 .with_description("Random sample from Iris dataset".to_string());
47
48 println!(
49 "Random sample dataset: {} samples, {} features",
50 sampledataset.n_samples(),
51 sampledataset.n_features()
52 );
53
54 if let Some(target) = &sampledataset.target {
55 let sample_class_counts = count_classes(target);
56 println!("Sample class distribution: {sample_class_counts:?}\n");
57 }
58
59 // Demonstrate bootstrap sampling (with replacement)
60 println!("=== Bootstrap Sampling (with replacement) ===");
61 let bootstrapsize = 200; // More than original dataset size
62 let bootstrap_indices = random_sample(n_samples, bootstrapsize, true, Some(42)).unwrap();
63
64 println!("Bootstrap sampled {bootstrapsize} indices from {n_samples} total samples");
65 println!(
66 "Bootstrap may have duplicates - first 10 indices: {:?}",
67 &bootstrap_indices[..10]
68 );
69
70 // Count frequency of each index in bootstrap sample
71 let mut index_counts = vec![0; n_samples];
72 for &idx in &bootstrap_indices {
73 index_counts[idx] += 1;
74 }
75 let max_count = *index_counts.iter().max().unwrap();
76 let zero_count = index_counts.iter().filter(|&&count| count == 0).count();
77
78 println!("Bootstrap statistics:");
79 println!("- Maximum frequency of any sample: {max_count}");
80 println!("- Number of original samples not selected: {zero_count}\n");
81
82 // Demonstrate stratified sampling
83 println!("=== Stratified Sampling ===");
84 if let Some(target) = &iris.target {
85 let stratifiedsize = 30;
86 let stratified_indices = stratified_sample(target, stratifiedsize, Some(42)).unwrap();
87
88 println!("Stratified sampled {stratifiedsize} indices maintaining class proportions");
89
90 // Create stratified subset
91 let stratifieddata = iris
92 .data
93 .select(scirs2_core::ndarray::Axis(0), &stratified_indices);
94 let stratified_target = target.select(scirs2_core::ndarray::Axis(0), &stratified_indices);
95 let stratifieddataset = Dataset::new(stratifieddata, Some(stratified_target))
96 .with_description("Stratified sample from Iris dataset".to_string());
97
98 println!(
99 "Stratified sample dataset: {} samples, {} features",
100 stratifieddataset.n_samples(),
101 stratifieddataset.n_features()
102 );
103
104 let stratified_class_counts = count_classes(&stratifieddataset.target.unwrap());
105 println!("Stratified sample class distribution: {stratified_class_counts:?}");
106
107 // Verify proportions are maintained
108 let original_proportions = calculate_proportions(&count_classes(target));
109 let stratified_proportions = calculate_proportions(&stratified_class_counts);
110
111 println!("Class proportion comparison:");
112 for (&class, &original_prop) in &original_proportions {
113 let stratified_prop = stratified_proportions.get(&class).unwrap_or(&0.0);
114 println!(
115 " Class {}: Original {:.2}%, Stratified {:.2}%",
116 class,
117 original_prop * 100.0,
118 stratified_prop * 100.0
119 );
120 }
121 }
122
123 // Demonstrate practical use case: creating training/validation splits
124 println!("\n=== Practical Example: Multiple Train/Validation Splits ===");
125 for i in 1..=3 {
126 let split_indices = random_sample(n_samples, 100, false, Some(42 + i)).unwrap();
127 let (train_indices, val_indices) = split_indices.split_at(80);
128
129 println!(
130 "Split {}: {} training samples, {} validation samples",
131 i,
132 train_indices.len(),
133 val_indices.len()
134 );
135 }
136
137 println!("\n=== Sampling Demo Complete ===");
138}Sourcepub fn from_metadata(
data: Array2<f64>,
target: Option<Array1<f64>>,
metadata: DatasetMetadata,
) -> Self
pub fn from_metadata( data: Array2<f64>, target: Option<Array1<f64>>, metadata: DatasetMetadata, ) -> Self
Sourcepub fn with_targetnames(self, targetnames: Vec<String>) -> Self
pub fn with_targetnames(self, targetnames: Vec<String>) -> Self
Sourcepub fn with_featurenames(self, featurenames: Vec<String>) -> Self
pub fn with_featurenames(self, featurenames: Vec<String>) -> Self
Sourcepub fn with_feature_descriptions(self, featuredescriptions: Vec<String>) -> Self
pub fn with_feature_descriptions(self, featuredescriptions: Vec<String>) -> Self
Sourcepub fn with_description(self, description: String) -> Self
pub fn with_description(self, description: String) -> Self
Add a description to the dataset (builder pattern)
§Arguments
description- Dataset description
§Returns
Self for method chaining
Examples found in repository?
10fn main() {
11 println!("=== Cross-Validation Demonstration ===\n");
12
13 // Create sample dataset
14 let data = Array2::from_shape_vec((20, 3), (0..60).map(|x| x as f64 / 10.0).collect()).unwrap();
15 let target = Array1::from(
16 (0..20)
17 .map(|i| if i % 2 == 0 { 0.0 } else { 1.0 })
18 .collect::<Vec<_>>(),
19 );
20
21 let dataset = Dataset::new(data.clone(), Some(target.clone()))
22 .with_description("Sample dataset for cross-validation demo".to_string());
23
24 println!("Dataset info:");
25 println!("- Samples: {}", dataset.n_samples());
26 println!("- Features: {}", dataset.n_features());
27 println!("- Description: {}\n", dataset.description.as_ref().unwrap());
28
29 // Demonstrate K-fold cross-validation
30 println!("=== K-Fold Cross-Validation (k=5) ===");
31 let k_folds = k_fold_split(dataset.n_samples(), 5, true, Some(42)).unwrap();
32
33 for (i, (train_indices, val_indices)) in k_folds.iter().enumerate() {
34 println!(
35 "Fold {}: Train, size: {}, Validation size: {}",
36 i + 1,
37 train_indices.len(),
38 val_indices.len()
39 );
40 println!(
41 " Train indices: {:?}",
42 &train_indices[..5.min(train_indices.len())]
43 );
44 println!(" Val indices: {val_indices:?}");
45 }
46 println!();
47
48 // Demonstrate Stratified K-fold cross-validation
49 println!("=== Stratified K-Fold Cross-Validation (k=4) ===");
50 let stratified_folds = stratified_k_fold_split(&target, 4, true, Some(42)).unwrap();
51
52 for (i, (train_indices, val_indices)) in stratified_folds.iter().enumerate() {
53 // Calculate class distribution in validation set
54 let val_targets: Vec<f64> = val_indices.iter().map(|&idx| target[idx]).collect();
55 let class_0_count = val_targets.iter().filter(|&&x| x == 0.0).count();
56 let class_1_count = val_targets.iter().filter(|&&x| x == 1.0).count();
57
58 println!(
59 "Fold {}: Train, size: {}, Validation size: {}",
60 i + 1,
61 train_indices.len(),
62 val_indices.len()
63 );
64 println!(
65 " Class distribution in validation: Class 0: {class_0_count}, Class 1: {class_1_count}"
66 );
67 }
68 println!();
69
70 // Demonstrate Time Series cross-validation
71 println!("=== Time Series Cross-Validation ===");
72 let ts_folds = time_series_split(dataset.n_samples(), 3, 3, 1).unwrap();
73
74 for (i, (train_indices, val_indices)) in ts_folds.iter().enumerate() {
75 println!(
76 "Split {}: Train, size: {}, Test size: {}",
77 i + 1,
78 train_indices.len(),
79 val_indices.len()
80 );
81 println!(
82 " Train range: {} to {}",
83 train_indices.first().unwrap_or(&0),
84 train_indices.last().unwrap_or(&0)
85 );
86 println!(
87 " Test range: {} to {}",
88 val_indices.first().unwrap_or(&0),
89 val_indices.last().unwrap_or(&0)
90 );
91 }
92 println!();
93
94 // Demonstrate usage with Dataset methods
95 println!("=== Using Cross-Validation with Dataset ===");
96 let first_fold = &k_folds[0];
97 let (train_indices, val_indices) = first_fold;
98
99 // Create training subset
100 let traindata = data.select(scirs2_core::ndarray::Axis(0), train_indices);
101 let train_target = target.select(scirs2_core::ndarray::Axis(0), train_indices);
102 let traindataset = Dataset::new(traindata, Some(train_target))
103 .with_description("Training fold from K-fold CV".to_string());
104
105 // Create validation subset
106 let valdata = data.select(scirs2_core::ndarray::Axis(0), val_indices);
107 let val_target = target.select(scirs2_core::ndarray::Axis(0), val_indices);
108 let valdataset = Dataset::new(valdata, Some(val_target))
109 .with_description("Validation fold from K-fold CV".to_string());
110
111 println!(
112 "Training dataset: {} samples, {} features",
113 traindataset.n_samples(),
114 traindataset.n_features()
115 );
116 println!(
117 "Validation dataset: {} samples, {} features",
118 valdataset.n_samples(),
119 valdataset.n_features()
120 );
121
122 println!("\n=== Cross-Validation Demo Complete ===");
123}More examples
10fn main() {
11 println!("=== Sampling and Bootstrapping Demonstration ===\n");
12
13 // Load the Iris dataset for demonstration
14 let iris = load_iris().unwrap();
15 let n_samples = iris.n_samples();
16
17 println!("Original Iris dataset:");
18 println!("- Samples: {n_samples}");
19 println!("- Features: {}", iris.n_features());
20
21 if let Some(target) = &iris.target {
22 let class_counts = count_classes(target);
23 println!("- Class distribution: {class_counts:?}\n");
24 }
25
26 // Demonstrate random sampling without replacement
27 println!("=== Random Sampling (without replacement) ===");
28 let samplesize = 30;
29 let random_indices = random_sample(n_samples, samplesize, false, Some(42)).unwrap();
30
31 println!("Sampled {samplesize} indices from {n_samples} total samples");
32 println!(
33 "Sample indices: {:?}",
34 &random_indices[..10.min(random_indices.len())]
35 );
36
37 // Create a subset dataset
38 let sampledata = iris
39 .data
40 .select(scirs2_core::ndarray::Axis(0), &random_indices);
41 let sample_target = iris
42 .target
43 .as_ref()
44 .map(|t| t.select(scirs2_core::ndarray::Axis(0), &random_indices));
45 let sampledataset = Dataset::new(sampledata, sample_target)
46 .with_description("Random sample from Iris dataset".to_string());
47
48 println!(
49 "Random sample dataset: {} samples, {} features",
50 sampledataset.n_samples(),
51 sampledataset.n_features()
52 );
53
54 if let Some(target) = &sampledataset.target {
55 let sample_class_counts = count_classes(target);
56 println!("Sample class distribution: {sample_class_counts:?}\n");
57 }
58
59 // Demonstrate bootstrap sampling (with replacement)
60 println!("=== Bootstrap Sampling (with replacement) ===");
61 let bootstrapsize = 200; // More than original dataset size
62 let bootstrap_indices = random_sample(n_samples, bootstrapsize, true, Some(42)).unwrap();
63
64 println!("Bootstrap sampled {bootstrapsize} indices from {n_samples} total samples");
65 println!(
66 "Bootstrap may have duplicates - first 10 indices: {:?}",
67 &bootstrap_indices[..10]
68 );
69
70 // Count frequency of each index in bootstrap sample
71 let mut index_counts = vec![0; n_samples];
72 for &idx in &bootstrap_indices {
73 index_counts[idx] += 1;
74 }
75 let max_count = *index_counts.iter().max().unwrap();
76 let zero_count = index_counts.iter().filter(|&&count| count == 0).count();
77
78 println!("Bootstrap statistics:");
79 println!("- Maximum frequency of any sample: {max_count}");
80 println!("- Number of original samples not selected: {zero_count}\n");
81
82 // Demonstrate stratified sampling
83 println!("=== Stratified Sampling ===");
84 if let Some(target) = &iris.target {
85 let stratifiedsize = 30;
86 let stratified_indices = stratified_sample(target, stratifiedsize, Some(42)).unwrap();
87
88 println!("Stratified sampled {stratifiedsize} indices maintaining class proportions");
89
90 // Create stratified subset
91 let stratifieddata = iris
92 .data
93 .select(scirs2_core::ndarray::Axis(0), &stratified_indices);
94 let stratified_target = target.select(scirs2_core::ndarray::Axis(0), &stratified_indices);
95 let stratifieddataset = Dataset::new(stratifieddata, Some(stratified_target))
96 .with_description("Stratified sample from Iris dataset".to_string());
97
98 println!(
99 "Stratified sample dataset: {} samples, {} features",
100 stratifieddataset.n_samples(),
101 stratifieddataset.n_features()
102 );
103
104 let stratified_class_counts = count_classes(&stratifieddataset.target.unwrap());
105 println!("Stratified sample class distribution: {stratified_class_counts:?}");
106
107 // Verify proportions are maintained
108 let original_proportions = calculate_proportions(&count_classes(target));
109 let stratified_proportions = calculate_proportions(&stratified_class_counts);
110
111 println!("Class proportion comparison:");
112 for (&class, &original_prop) in &original_proportions {
113 let stratified_prop = stratified_proportions.get(&class).unwrap_or(&0.0);
114 println!(
115 " Class {}: Original {:.2}%, Stratified {:.2}%",
116 class,
117 original_prop * 100.0,
118 stratified_prop * 100.0
119 );
120 }
121 }
122
123 // Demonstrate practical use case: creating training/validation splits
124 println!("\n=== Practical Example: Multiple Train/Validation Splits ===");
125 for i in 1..=3 {
126 let split_indices = random_sample(n_samples, 100, false, Some(42 + i)).unwrap();
127 let (train_indices, val_indices) = split_indices.split_at(80);
128
129 println!(
130 "Split {}: {} training samples, {} validation samples",
131 i,
132 train_indices.len(),
133 val_indices.len()
134 );
135 }
136
137 println!("\n=== Sampling Demo Complete ===");
138}Sourcepub fn with_metadata(self, key: &str, value: &str) -> Self
pub fn with_metadata(self, key: &str, value: &str) -> Self
Sourcepub fn n_samples(&self) -> usize
pub fn n_samples(&self) -> usize
Examples found in repository?
4fn main() -> Result<(), Box<dyn std::error::Error>> {
5 let iris = load_iris()?;
6 println!("Iris dataset loaded:");
7 println!(" Samples: {}", iris.n_samples());
8 println!(" Features: {}", iris.n_features());
9 println!(
10 " Target classes: {}",
11 iris.targetnames.as_ref().map_or(0, |v| v.len())
12 );
13
14 let boston = load_boston()?;
15 println!("\nBoston Housing dataset loaded:");
16 println!(" Samples: {}", boston.n_samples());
17 println!(" Features: {}", boston.n_features());
18
19 Ok(())
20}More examples
30fn main() -> Result<(), Box<dyn std::error::Error>> {
31 println!("🚀 SciRS2-Datasets Advanced Mode Showcase");
32 println!("===========================================\n");
33
34 // Create a sample dataset for demonstration
35 let dataset = create_sampledataset()?;
36 println!(
37 "📊 Created sample dataset: {} samples, {} features",
38 dataset.n_samples(),
39 dataset.n_features()
40 );
41
42 // Demonstrate advanced analytics
43 demonstrate_advanced_analytics(&dataset)?;
44
45 // Demonstrate advanced-GPU optimization
46 demonstrate_advanced_gpu_optimization()?;
47
48 // Demonstrate adaptive streaming
49 demonstrate_adaptive_streaming(&dataset)?;
50
51 println!("\n✅ Advanced mode demonstration completed successfully!");
52 Ok(())
53}4fn main() -> Result<(), Box<dyn std::error::Error>> {
5 // Load a CSV file with headers and target column
6 let dataset = load_csv_legacy(
7 "scirs2-datasets/data/example.csv",
8 true, // has header
9 Some(3), // target column index (0-based)
10 )?;
11
12 println!("CSV dataset loaded successfully:");
13 println!(" Samples: {}", dataset.n_samples());
14 println!(" Features: {}", dataset.n_features());
15 println!(" Feature names: {:?}", dataset.featurenames);
16
17 // Access data and target
18 println!("\nFirst 3 samples:");
19 for i in 0..3 {
20 let features = dataset.data.row(i);
21 let target = dataset.target.as_ref().map(|t| t[i]);
22 println!(" Sample {i}: Features = {features:?}, Target = {target:?}");
23 }
24
25 Ok(())
26}196fn demonstrate_comprehensive_corruption() {
197 println!("Testing comprehensive dataset corruption:");
198
199 // Load a real dataset
200 let iris = load_iris().unwrap();
201 println!(
202 "Original Iris dataset: {} samples, {} features",
203 iris.n_samples(),
204 iris.n_features()
205 );
206
207 let original_stats = calculate_basic_stats(&iris.data);
208 println!(
209 "Original stats - Mean: {:.3}, Std: {:.3}",
210 original_stats.0, original_stats.1
211 );
212
213 // Create different levels of corruption
214 let corruption_levels = [
215 (0.05, 0.02, "Light corruption"),
216 (0.1, 0.05, "Moderate corruption"),
217 (0.2, 0.1, "Heavy corruption"),
218 (0.3, 0.15, "Severe corruption"),
219 ];
220
221 for (missing_rate, outlier_rate, description) in corruption_levels {
222 let corrupted = make_corrupted_dataset(
223 &iris,
224 missing_rate,
225 MissingPattern::MAR, // More realistic than MCAR
226 outlier_rate,
227 OutlierType::Point,
228 2.5,
229 Some(42),
230 )
231 .unwrap();
232
233 // Calculate how much data is usable
234 let total_elements = corrupted.data.len();
235 let missing_elements = corrupted.data.iter().filter(|&&x| x.is_nan()).count();
236 let usable_percentage =
237 ((total_elements - missing_elements) as f64 / total_elements as f64) * 100.0;
238
239 println!("{description}:");
240 println!(" Missing data: {:.1}%", missing_rate * 100.0);
241 println!(" Outliers: {:.1}%", outlier_rate * 100.0);
242 println!(" Usable data: {:.1}%", usable_percentage);
243
244 // Show metadata
245 if let Some(missing_count) = corrupted.metadata.get("missing_count") {
246 println!(" Actual missing: {missing_count} elements");
247 }
248 if let Some(outlier_count) = corrupted.metadata.get("outlier_count") {
249 println!(" Actual outliers: {outlier_count} samples");
250 }
251 }
252}5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("Loading time series datasets...\n");
7
8 // Load the electrocardiogram dataset
9 let ecg = electrocardiogram()?;
10
11 println!("Electrocardiogram dataset:");
12 println!(" Time steps: {}", ecg.n_samples());
13 println!(" Features: {}", ecg.n_features());
14 println!(
15 " Sampling rate: {} Hz",
16 ecg.metadata
17 .get("sampling_rate")
18 .unwrap_or(&"unknown".to_string())
19 );
20 println!(
21 " Duration: {}",
22 ecg.metadata
23 .get("duration")
24 .unwrap_or(&"unknown".to_string())
25 );
26
27 // Get a slice of the data and display basic statistics
28 let ecg_slice = ecg.data.slice(s![0..10, 0]);
29 println!(" First 10 data points: {ecg_slice:?}");
30
31 // Calculate some basic statistics
32 let ecgdata = ecg.data.column(0);
33 let min = ecgdata.fold(f64::INFINITY, |a, &b| a.min(b));
34 let max = ecgdata.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
35 let mean = ecgdata.sum() / ecgdata.len() as f64;
36
37 println!(" Min: {min:.3} mV");
38 println!(" Max: {max:.3} mV");
39 println!(" Mean: {mean:.3} mV");
40
41 // Note: Stock market and weather datasets are commented out because their source data
42 // is not yet available.
43
44 /*
45 // Load the stock market dataset
46 println!("\nStock market dataset:");
47
48 // Get price changes (returns)
49 let stock_returns = stock_market(true)?;
50 println!(" Time steps: {}", stock_returns.n_samples());
51 println!(" Companies: {}", stock_returns.n_features());
52
53 // Print companies
54 if let Some(featurenames) = &stock_returns.featurenames {
55 println!(" Companies: {}", featurenames.join(", "));
56 }
57
58 // Load the weather dataset
59 println!("\nWeather dataset:");
60 let tempdata = weather(Some("temperature"))?;
61
62 println!(" Time steps: {}", tempdata.n_samples());
63 println!(" Locations: {}", tempdata.n_features());
64 */
65
66 println!("\nTime series dataset loaded successfully!");
67
68 Ok(())
69}7fn main() {
8 // Check if a CSV file is provided as a command-line argument
9 let args: Vec<String> = env::args().collect();
10 if args.len() < 2 {
11 println!("Usage: {} <path_to_csv_file>", args[0]);
12 println!("Example: {} examples/sampledata.csv", args[0]);
13 return;
14 }
15
16 let filepath = &args[1];
17
18 // Verify the file exists
19 if !Path::new(filepath).exists() {
20 println!("Error: File '{filepath}' does not exist");
21 return;
22 }
23
24 // Load CSV file
25 println!("Loading CSV file: {filepath}");
26 let csv_config = loaders::CsvConfig {
27 has_header: true,
28 target_column: None,
29 ..Default::default()
30 };
31 match loaders::load_csv(filepath, csv_config) {
32 Ok(dataset) => {
33 print_dataset_info(&dataset, "Loaded CSV");
34
35 // Split the dataset for demonstration
36 println!("\nDemonstrating train-test split...");
37 match train_test_split(&dataset, 0.2, Some(42)) {
38 Ok((train, test)) => {
39 println!("Training set: {} samples", train.n_samples());
40 println!("Test set: {} samples", test.n_samples());
41
42 // Save as JSON for demonstration
43 let jsonpath = format!("{filepath}.json");
44 println!("\nSaving training dataset to JSON: {jsonpath}");
45 if let Err(e) = loaders::save_json(&train, &jsonpath) {
46 println!("Error saving JSON: {e}");
47 } else {
48 println!("Successfully saved JSON file");
49
50 // Load back the JSON file
51 println!("\nLoading back from JSON file...");
52 match loaders::load_json(&jsonpath) {
53 Ok(loaded) => {
54 print_dataset_info(&loaded, "Loaded JSON");
55 }
56 Err(e) => println!("Error loading JSON: {e}"),
57 }
58 }
59 }
60 Err(e) => println!("Error splitting dataset: {e}"),
61 }
62 }
63 Err(e) => println!("Error loading CSV: {e}"),
64 }
65}
66
67#[allow(dead_code)]
68fn print_dataset_info(dataset: &Dataset, name: &str) {
69 println!("=== {name} Dataset ===");
70 println!("Number of samples: {}", dataset.n_samples());
71 println!("Number of features: {}", dataset.n_features());
72
73 if let Some(featurenames) = &dataset.featurenames {
74 println!(
75 "Features: {:?}",
76 &featurenames[0..std::cmp::min(5, featurenames.len())]
77 );
78 if featurenames.len() > 5 {
79 println!("... and {} more", featurenames.len() - 5);
80 }
81 }
82
83 if let Some(target) = &dataset.target {
84 println!("Target shape: {}", target.len());
85
86 if let Some(targetnames) = &dataset.targetnames {
87 println!("Target classes: {targetnames:?}");
88 }
89 }
90
91 for (key, value) in &dataset.metadata {
92 println!("Metadata - {key}: {value}");
93 }
94}Sourcepub fn n_features(&self) -> usize
pub fn n_features(&self) -> usize
Examples found in repository?
4fn main() -> Result<(), Box<dyn std::error::Error>> {
5 let iris = load_iris()?;
6 println!("Iris dataset loaded:");
7 println!(" Samples: {}", iris.n_samples());
8 println!(" Features: {}", iris.n_features());
9 println!(
10 " Target classes: {}",
11 iris.targetnames.as_ref().map_or(0, |v| v.len())
12 );
13
14 let boston = load_boston()?;
15 println!("\nBoston Housing dataset loaded:");
16 println!(" Samples: {}", boston.n_samples());
17 println!(" Features: {}", boston.n_features());
18
19 Ok(())
20}More examples
30fn main() -> Result<(), Box<dyn std::error::Error>> {
31 println!("🚀 SciRS2-Datasets Advanced Mode Showcase");
32 println!("===========================================\n");
33
34 // Create a sample dataset for demonstration
35 let dataset = create_sampledataset()?;
36 println!(
37 "📊 Created sample dataset: {} samples, {} features",
38 dataset.n_samples(),
39 dataset.n_features()
40 );
41
42 // Demonstrate advanced analytics
43 demonstrate_advanced_analytics(&dataset)?;
44
45 // Demonstrate advanced-GPU optimization
46 demonstrate_advanced_gpu_optimization()?;
47
48 // Demonstrate adaptive streaming
49 demonstrate_adaptive_streaming(&dataset)?;
50
51 println!("\n✅ Advanced mode demonstration completed successfully!");
52 Ok(())
53}4fn main() -> Result<(), Box<dyn std::error::Error>> {
5 // Load a CSV file with headers and target column
6 let dataset = load_csv_legacy(
7 "scirs2-datasets/data/example.csv",
8 true, // has header
9 Some(3), // target column index (0-based)
10 )?;
11
12 println!("CSV dataset loaded successfully:");
13 println!(" Samples: {}", dataset.n_samples());
14 println!(" Features: {}", dataset.n_features());
15 println!(" Feature names: {:?}", dataset.featurenames);
16
17 // Access data and target
18 println!("\nFirst 3 samples:");
19 for i in 0..3 {
20 let features = dataset.data.row(i);
21 let target = dataset.target.as_ref().map(|t| t[i]);
22 println!(" Sample {i}: Features = {features:?}, Target = {target:?}");
23 }
24
25 Ok(())
26}68fn print_dataset_info(dataset: &Dataset, name: &str) {
69 println!("=== {name} Dataset ===");
70 println!("Number of samples: {}", dataset.n_samples());
71 println!("Number of features: {}", dataset.n_features());
72
73 if let Some(featurenames) = &dataset.featurenames {
74 println!(
75 "Features: {:?}",
76 &featurenames[0..std::cmp::min(5, featurenames.len())]
77 );
78 if featurenames.len() > 5 {
79 println!("... and {} more", featurenames.len() - 5);
80 }
81 }
82
83 if let Some(target) = &dataset.target {
84 println!("Target shape: {}", target.len());
85
86 if let Some(targetnames) = &dataset.targetnames {
87 println!("Target classes: {targetnames:?}");
88 }
89 }
90
91 for (key, value) in &dataset.metadata {
92 println!("Metadata - {key}: {value}");
93 }
94}184fn print_dataset_summary(dataset: &scirs2_datasets::Dataset, name: &str) {
185 let n_classes = if let Some(target) = &dataset.target {
186 let unique_labels: std::collections::HashSet<_> =
187 target.iter().map(|&x| x as i32).collect();
188 unique_labels.len()
189 } else {
190 0
191 };
192
193 let class_info = if n_classes > 0 {
194 format!(", {n_classes} classes")
195 } else {
196 " (unsupervised)".to_string()
197 };
198
199 println!(
200 " {}: {} samples, {} features{}",
201 name,
202 dataset.n_samples(),
203 dataset.n_features(),
204 class_info
205 );
206
207 // Print first few data points for small _datasets
208 if dataset.n_samples() <= 10 && dataset.n_features() <= 3 {
209 println!(" Sample points:");
210 for i in 0..dataset.n_samples().min(3) {
211 let point: Vec<f64> = (0..dataset.n_features())
212 .map(|j| dataset.data[[i, j]])
213 .collect();
214 println!(
215 " [{:.3}, {:.3}{}]",
216 point[0],
217 point[1],
218 if point.len() > 2 {
219 format!(", {:.3}", point[2])
220 } else {
221 "".to_string()
222 }
223 );
224 }
225 }
226}196fn demonstrate_comprehensive_corruption() {
197 println!("Testing comprehensive dataset corruption:");
198
199 // Load a real dataset
200 let iris = load_iris().unwrap();
201 println!(
202 "Original Iris dataset: {} samples, {} features",
203 iris.n_samples(),
204 iris.n_features()
205 );
206
207 let original_stats = calculate_basic_stats(&iris.data);
208 println!(
209 "Original stats - Mean: {:.3}, Std: {:.3}",
210 original_stats.0, original_stats.1
211 );
212
213 // Create different levels of corruption
214 let corruption_levels = [
215 (0.05, 0.02, "Light corruption"),
216 (0.1, 0.05, "Moderate corruption"),
217 (0.2, 0.1, "Heavy corruption"),
218 (0.3, 0.15, "Severe corruption"),
219 ];
220
221 for (missing_rate, outlier_rate, description) in corruption_levels {
222 let corrupted = make_corrupted_dataset(
223 &iris,
224 missing_rate,
225 MissingPattern::MAR, // More realistic than MCAR
226 outlier_rate,
227 OutlierType::Point,
228 2.5,
229 Some(42),
230 )
231 .unwrap();
232
233 // Calculate how much data is usable
234 let total_elements = corrupted.data.len();
235 let missing_elements = corrupted.data.iter().filter(|&&x| x.is_nan()).count();
236 let usable_percentage =
237 ((total_elements - missing_elements) as f64 / total_elements as f64) * 100.0;
238
239 println!("{description}:");
240 println!(" Missing data: {:.1}%", missing_rate * 100.0);
241 println!(" Outliers: {:.1}%", outlier_rate * 100.0);
242 println!(" Usable data: {:.1}%", usable_percentage);
243
244 // Show metadata
245 if let Some(missing_count) = corrupted.metadata.get("missing_count") {
246 println!(" Actual missing: {missing_count} elements");
247 }
248 if let Some(outlier_count) = corrupted.metadata.get("outlier_count") {
249 println!(" Actual outliers: {outlier_count} samples");
250 }
251 }
252}Sourcepub fn has_target(&self) -> bool
pub fn has_target(&self) -> bool
Sourcepub fn featurenames(&self) -> Option<&Vec<String>>
pub fn featurenames(&self) -> Option<&Vec<String>>
Get a reference to the feature names if available
§Returns
Optional reference to feature names vector
Examples found in repository?
102fn demonstrate_classification_datasets() -> Result<(), Box<dyn std::error::Error>> {
103 println!("🎯 CLASSIFICATION DATASETS");
104 println!("{}", "-".repeat(40));
105
106 // Titanic dataset
107 println!("Loading Titanic dataset...");
108 let titanic = load_titanic()?;
109
110 println!("Titanic Dataset:");
111 println!(
112 " Description: {}",
113 titanic
114 .metadata
115 .get("description")
116 .unwrap_or(&"Unknown".to_string())
117 );
118 println!(" Samples: {}", titanic.n_samples());
119 println!(" Features: {}", titanic.n_features());
120
121 if let Some(featurenames) = titanic.featurenames() {
122 println!(" Features: {featurenames:?}");
123 }
124
125 if let Some(targetnames) = titanic.targetnames() {
126 println!(" Classes: {targetnames:?}");
127 }
128
129 // Analyze class distribution
130 if let Some(target) = &titanic.target {
131 let mut class_counts = HashMap::new();
132 for &class in target.iter() {
133 *class_counts.entry(class as i32).or_insert(0) += 1;
134 }
135 println!(" Class distribution: {class_counts:?}");
136
137 // Calculate survival rate
138 let survived = class_counts.get(&1).unwrap_or(&0);
139 let total = titanic.n_samples();
140 println!(
141 " Survival rate: {:.1}%",
142 (*survived as f64 / total as f64) * 100.0
143 );
144 }
145
146 // Demonstrate train/test split
147 let (train, test) = train_test_split(&titanic, 0.2, Some(42))?;
148 println!(
149 " Train/test split: {} train, {} test",
150 train.n_samples(),
151 test.n_samples()
152 );
153
154 // Adult (Census Income) dataset
155 println!("\nLoading Adult (Census Income) dataset...");
156 match load_adult() {
157 Ok(adult) => {
158 println!("Adult Dataset:");
159 println!(
160 " Description: {}",
161 adult
162 .metadata
163 .get("description")
164 .unwrap_or(&"Unknown".to_string())
165 );
166 println!(" Samples: {}", adult.n_samples());
167 println!(" Features: {}", adult.n_features());
168 println!(" Task: Predict income >$50K based on census data");
169 }
170 Err(e) => {
171 println!(" Note: Adult dataset requires download: {e}");
172 println!(" This is expected for the demonstration");
173 }
174 }
175
176 println!();
177 Ok(())
178}
179
180#[allow(dead_code)]
181fn demonstrate_regression_datasets() -> Result<(), Box<dyn std::error::Error>> {
182 println!("📈 REGRESSION DATASETS");
183 println!("{}", "-".repeat(40));
184
185 // California Housing dataset
186 println!("Loading California Housing dataset...");
187 let housing = load_california_housing()?;
188
189 println!("California Housing Dataset:");
190 println!(
191 " Description: {}",
192 housing
193 .metadata
194 .get("description")
195 .unwrap_or(&"Unknown".to_string())
196 );
197 println!(" Samples: {}", housing.n_samples());
198 println!(" Features: {}", housing.n_features());
199
200 if let Some(featurenames) = housing.featurenames() {
201 println!(" Features: {featurenames:?}");
202 }
203
204 // Analyze target distribution
205 if let Some(target) = &housing.target {
206 let mean = target.mean().unwrap_or(0.0);
207 let std = target.std(0.0);
208 let min = target.iter().fold(f64::INFINITY, |a, &b| a.min(b));
209 let max = target.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
210
211 println!(" Target (house value) statistics:");
212 println!(" Mean: {mean:.2} (hundreds of thousands)");
213 println!(" Std: {std:.2}");
214 println!(" Range: [{min:.2}, {max:.2}]");
215 }
216
217 // Red Wine Quality dataset
218 println!("\nLoading Red Wine Quality dataset...");
219 let wine = load_red_wine_quality()?;
220
221 println!("Red Wine Quality Dataset:");
222 println!(
223 " Description: {}",
224 wine.metadata
225 .get("description")
226 .unwrap_or(&"Unknown".to_string())
227 );
228 println!(" Samples: {}", wine.n_samples());
229 println!(" Features: {}", wine.n_features());
230
231 if let Some(target) = &wine.target {
232 let mean_quality = target.mean().unwrap_or(0.0);
233 println!(" Average wine quality: {mean_quality:.1}/10");
234
235 // Quality distribution
236 let mut quality_counts = HashMap::new();
237 for &quality in target.iter() {
238 let q = quality.round() as i32;
239 *quality_counts.entry(q).or_insert(0) += 1;
240 }
241 println!(" Quality distribution: {quality_counts:?}");
242 }
243
244 println!();
245 Ok(())
246}
247
248#[allow(dead_code)]
249fn demonstrate_healthcare_datasets() -> Result<(), Box<dyn std::error::Error>> {
250 println!("🏥 HEALTHCARE DATASETS");
251 println!("{}", "-".repeat(40));
252
253 // Heart Disease dataset
254 println!("Loading Heart Disease dataset...");
255 let heart = load_heart_disease()?;
256
257 println!("Heart Disease Dataset:");
258 println!(
259 " Description: {}",
260 heart
261 .metadata
262 .get("description")
263 .unwrap_or(&"Unknown".to_string())
264 );
265 println!(" Samples: {}", heart.n_samples());
266 println!(" Features: {}", heart.n_features());
267
268 if let Some(featurenames) = heart.featurenames() {
269 println!(" Clinical features: {:?}", &featurenames[..5]); // Show first 5
270 println!(" ... and {} more features", featurenames.len() - 5);
271 }
272
273 // Analyze risk factors
274 if let Some(target) = &heart.target {
275 let mut disease_counts = HashMap::new();
276 for &disease in target.iter() {
277 *disease_counts.entry(disease as i32).or_insert(0) += 1;
278 }
279
280 let with_disease = disease_counts.get(&1).unwrap_or(&0);
281 let total = heart.n_samples();
282 println!(
283 " Disease prevalence: {:.1}% ({}/{})",
284 (*with_disease as f64 / total as f64) * 100.0,
285 with_disease,
286 total
287 );
288 }
289
290 // Demonstrate feature analysis
291 println!(" Sample clinical parameter ranges:");
292 let age_col = heart.data.column(0);
293 let age_mean = age_col.mean();
294 let age_std = age_col.std(0.0);
295 println!(" Age: {age_mean:.1} ± {age_std:.1} years");
296
297 println!();
298 Ok(())
299}Sourcepub fn targetnames(&self) -> Option<&Vec<String>>
pub fn targetnames(&self) -> Option<&Vec<String>>
Examples found in repository?
102fn demonstrate_classification_datasets() -> Result<(), Box<dyn std::error::Error>> {
103 println!("🎯 CLASSIFICATION DATASETS");
104 println!("{}", "-".repeat(40));
105
106 // Titanic dataset
107 println!("Loading Titanic dataset...");
108 let titanic = load_titanic()?;
109
110 println!("Titanic Dataset:");
111 println!(
112 " Description: {}",
113 titanic
114 .metadata
115 .get("description")
116 .unwrap_or(&"Unknown".to_string())
117 );
118 println!(" Samples: {}", titanic.n_samples());
119 println!(" Features: {}", titanic.n_features());
120
121 if let Some(featurenames) = titanic.featurenames() {
122 println!(" Features: {featurenames:?}");
123 }
124
125 if let Some(targetnames) = titanic.targetnames() {
126 println!(" Classes: {targetnames:?}");
127 }
128
129 // Analyze class distribution
130 if let Some(target) = &titanic.target {
131 let mut class_counts = HashMap::new();
132 for &class in target.iter() {
133 *class_counts.entry(class as i32).or_insert(0) += 1;
134 }
135 println!(" Class distribution: {class_counts:?}");
136
137 // Calculate survival rate
138 let survived = class_counts.get(&1).unwrap_or(&0);
139 let total = titanic.n_samples();
140 println!(
141 " Survival rate: {:.1}%",
142 (*survived as f64 / total as f64) * 100.0
143 );
144 }
145
146 // Demonstrate train/test split
147 let (train, test) = train_test_split(&titanic, 0.2, Some(42))?;
148 println!(
149 " Train/test split: {} train, {} test",
150 train.n_samples(),
151 test.n_samples()
152 );
153
154 // Adult (Census Income) dataset
155 println!("\nLoading Adult (Census Income) dataset...");
156 match load_adult() {
157 Ok(adult) => {
158 println!("Adult Dataset:");
159 println!(
160 " Description: {}",
161 adult
162 .metadata
163 .get("description")
164 .unwrap_or(&"Unknown".to_string())
165 );
166 println!(" Samples: {}", adult.n_samples());
167 println!(" Features: {}", adult.n_features());
168 println!(" Task: Predict income >$50K based on census data");
169 }
170 Err(e) => {
171 println!(" Note: Adult dataset requires download: {e}");
172 println!(" This is expected for the demonstration");
173 }
174 }
175
176 println!();
177 Ok(())
178}Sourcepub fn description(&self) -> Option<&String>
pub fn description(&self) -> Option<&String>
Get a reference to the dataset description if available
§Returns
Optional reference to dataset description
Sourcepub fn set_metadata(&mut self, key: &str, value: &str)
pub fn set_metadata(&mut self, key: &str, value: &str)
Trait Implementations§
Source§impl<'de> Deserialize<'de> for Dataset
impl<'de> Deserialize<'de> for Dataset
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for Dataset
impl RefUnwindSafe for Dataset
impl Send for Dataset
impl Sync for Dataset
impl Unpin for Dataset
impl UnwindSafe for Dataset
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
self to the equivalent element of its superset.