pub struct Dataset {
pub data: Array2<f64>,
pub target: Option<Array1<f64>>,
pub target_names: Option<Vec<String>>,
pub feature_names: Option<Vec<String>>,
pub feature_descriptions: Option<Vec<String>>,
pub description: Option<String>,
pub metadata: HashMap<String, String>,
}
Expand description
Represents a dataset with features, optional targets, and metadata
Fields§
§data: Array2<f64>
Features/data matrix (n_samples, n_features)
target: Option<Array1<f64>>
Optional target values
target_names: Option<Vec<String>>
Optional target names for classification problems
feature_names: Option<Vec<String>>
Optional feature names
feature_descriptions: Option<Vec<String>>
Optional descriptions for each feature
description: Option<String>
Optional dataset description
metadata: HashMap<String, String>
Optional dataset metadata
Implementations§
Source§impl Dataset
impl Dataset
Sourcepub fn new(data: Array2<f64>, target: Option<Array1<f64>>) -> Self
pub fn new(data: Array2<f64>, target: Option<Array1<f64>>) -> Self
Create a new dataset with the given data and target
Sourcepub fn with_target_names(self, target_names: Vec<String>) -> Self
pub fn with_target_names(self, target_names: Vec<String>) -> Self
Add target names to the dataset
Sourcepub fn with_feature_names(self, feature_names: Vec<String>) -> Self
pub fn with_feature_names(self, feature_names: Vec<String>) -> Self
Add feature names to the dataset
Sourcepub fn with_feature_descriptions(
self,
feature_descriptions: Vec<String>,
) -> Self
pub fn with_feature_descriptions( self, feature_descriptions: Vec<String>, ) -> Self
Add feature descriptions to the dataset
Sourcepub fn with_description(self, description: String) -> Self
pub fn with_description(self, description: String) -> Self
Add a description to the dataset
Sourcepub fn with_metadata(self, key: &str, value: &str) -> Self
pub fn with_metadata(self, key: &str, value: &str) -> Self
Add metadata to the dataset
Sourcepub fn n_samples(&self) -> usize
pub fn n_samples(&self) -> usize
Get the number of samples in the dataset
Examples found in repository?
examples/toy_datasets.rs (line 6)
3fn main() -> Result<(), Box<dyn std::error::Error>> {
4 let iris = load_iris()?;
5 println!("Iris dataset loaded:");
6 println!(" Samples: {}", iris.n_samples());
7 println!(" Features: {}", iris.n_features());
8 println!(
9 " Target classes: {}",
10 iris.target_names.as_ref().map_or(0, |v| v.len())
11 );
12
13 let boston = load_boston()?;
14 println!("\nBoston Housing dataset loaded:");
15 println!(" Samples: {}", boston.n_samples());
16 println!(" Features: {}", boston.n_features());
17
18 Ok(())
19}
More examples
examples/csv_loading.rs (line 12)
3fn main() -> Result<(), Box<dyn std::error::Error>> {
4 // Load a CSV file with headers and target column
5 let dataset = load_csv(
6 "scirs2-datasets/data/example.csv",
7 true, // has header
8 Some(3), // target column index (0-based)
9 )?;
10
11 println!("CSV dataset loaded successfully:");
12 println!(" Samples: {}", dataset.n_samples());
13 println!(" Features: {}", dataset.n_features());
14 println!(" Feature names: {:?}", dataset.feature_names);
15
16 // Access data and target
17 println!("\nFirst 3 samples:");
18 for i in 0..3 {
19 let features = dataset.data.row(i);
20 let target = dataset.target.as_ref().map(|t| t[i]);
21 println!(
22 " Sample {}: Features = {:?}, Target = {:?}",
23 i, features, target
24 );
25 }
26
27 Ok(())
28}
examples/dataset_loaders.rs (line 33)
6fn main() {
7 // Check if a CSV file is provided as a command-line argument
8 let args: Vec<String> = env::args().collect();
9 if args.len() < 2 {
10 println!("Usage: {} <path_to_csv_file>", args[0]);
11 println!("Example: {} examples/sample_data.csv", args[0]);
12 return;
13 }
14
15 let file_path = &args[1];
16
17 // Verify the file exists
18 if !Path::new(file_path).exists() {
19 println!("Error: File '{}' does not exist", file_path);
20 return;
21 }
22
23 // Load CSV file
24 println!("Loading CSV file: {}", file_path);
25 match loaders::load_csv(file_path, true, None) {
26 Ok(dataset) => {
27 print_dataset_info(&dataset, "Loaded CSV");
28
29 // Split the dataset for demonstration
30 println!("\nDemonstrating train-test split...");
31 match dataset.train_test_split(0.2, Some(42)) {
32 Ok((train, test)) => {
33 println!("Training set: {} samples", train.n_samples());
34 println!("Test set: {} samples", test.n_samples());
35
36 // Save as JSON for demonstration
37 let json_path = format!("{}.json", file_path);
38 println!("\nSaving training dataset to JSON: {}", json_path);
39 if let Err(e) = loaders::save_json(&train, &json_path) {
40 println!("Error saving JSON: {}", e);
41 } else {
42 println!("Successfully saved JSON file");
43
44 // Load back the JSON file
45 println!("\nLoading back from JSON file...");
46 match loaders::load_json(&json_path) {
47 Ok(loaded) => {
48 print_dataset_info(&loaded, "Loaded JSON");
49 }
50 Err(e) => println!("Error loading JSON: {}", e),
51 }
52 }
53 }
54 Err(e) => println!("Error splitting dataset: {}", e),
55 }
56 }
57 Err(e) => println!("Error loading CSV: {}", e),
58 }
59}
60
61fn print_dataset_info(dataset: &Dataset, name: &str) {
62 println!("=== {} Dataset ===", name);
63 println!("Number of samples: {}", dataset.n_samples());
64 println!("Number of features: {}", dataset.n_features());
65
66 if let Some(feature_names) = &dataset.feature_names {
67 println!(
68 "Features: {:?}",
69 &feature_names[0..std::cmp::min(5, feature_names.len())]
70 );
71 if feature_names.len() > 5 {
72 println!("... and {} more", feature_names.len() - 5);
73 }
74 }
75
76 if let Some(target) = &dataset.target {
77 println!("Target shape: {}", target.len());
78
79 if let Some(target_names) = &dataset.target_names {
80 println!("Target classes: {:?}", target_names);
81 }
82 }
83
84 for (key, value) in &dataset.metadata {
85 println!("Metadata - {}: {}", key, value);
86 }
87}
examples/data_generators.rs (line 25)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("Creating synthetic datasets...\n");
7
8 // Generate classification dataset
9 let n_samples = 100;
10 let n_features = 5;
11
12 let classification_data = make_classification(
13 n_samples,
14 n_features,
15 3, // 3 classes
16 2, // 2 clusters per class
17 3, // 3 informative features
18 Some(42), // random seed
19 )?;
20
21 // Train-test split
22 let (train, test) = classification_data.train_test_split(0.2, Some(42))?;
23
24 println!("Classification dataset:");
25 println!(" Total samples: {}", classification_data.n_samples());
26 println!(" Features: {}", classification_data.n_features());
27 println!(" Training samples: {}", train.n_samples());
28 println!(" Test samples: {}", test.n_samples());
29
30 // Generate regression dataset
31 let regression_data = make_regression(
32 n_samples,
33 n_features,
34 3, // 3 informative features
35 0.5, // noise level
36 Some(42),
37 )?;
38
39 println!("\nRegression dataset:");
40 println!(" Samples: {}", regression_data.n_samples());
41 println!(" Features: {}", regression_data.n_features());
42
43 // Normalize the data (in-place)
44 let mut data_copy = regression_data.data.clone();
45 normalize(&mut data_copy);
46 println!(" Data normalized successfully");
47
48 // Generate clustering data (blobs)
49 let clustering_data = make_blobs(
50 n_samples,
51 2, // 2 features for easy visualization
52 4, // 4 clusters
53 0.8, // cluster standard deviation
54 Some(42),
55 )?;
56
57 println!("\nClustering dataset (blobs):");
58 println!(" Samples: {}", clustering_data.n_samples());
59 println!(" Features: {}", clustering_data.n_features());
60
61 // Find the number of clusters by finding the max value of target
62 let num_clusters = clustering_data.target.as_ref().map_or(0, |t| {
63 let mut max_val = -1.0;
64 for &val in t.iter() {
65 if val > max_val {
66 max_val = val;
67 }
68 }
69 (max_val as usize) + 1
70 });
71
72 println!(" Clusters: {}", num_clusters);
73
74 // Generate time series data
75 let time_series = make_time_series(
76 100, // 100 time steps
77 3, // 3 features/variables
78 true, // with trend
79 true, // with seasonality
80 0.2, // noise level
81 Some(42),
82 )?;
83
84 println!("\nTime series dataset:");
85 println!(" Time steps: {}", time_series.n_samples());
86 println!(" Features: {}", time_series.n_features());
87
88 Ok(())
89}
Sourcepub fn n_features(&self) -> usize
pub fn n_features(&self) -> usize
Get the number of features in the dataset
Examples found in repository?
examples/toy_datasets.rs (line 7)
3fn main() -> Result<(), Box<dyn std::error::Error>> {
4 let iris = load_iris()?;
5 println!("Iris dataset loaded:");
6 println!(" Samples: {}", iris.n_samples());
7 println!(" Features: {}", iris.n_features());
8 println!(
9 " Target classes: {}",
10 iris.target_names.as_ref().map_or(0, |v| v.len())
11 );
12
13 let boston = load_boston()?;
14 println!("\nBoston Housing dataset loaded:");
15 println!(" Samples: {}", boston.n_samples());
16 println!(" Features: {}", boston.n_features());
17
18 Ok(())
19}
More examples
examples/csv_loading.rs (line 13)
3fn main() -> Result<(), Box<dyn std::error::Error>> {
4 // Load a CSV file with headers and target column
5 let dataset = load_csv(
6 "scirs2-datasets/data/example.csv",
7 true, // has header
8 Some(3), // target column index (0-based)
9 )?;
10
11 println!("CSV dataset loaded successfully:");
12 println!(" Samples: {}", dataset.n_samples());
13 println!(" Features: {}", dataset.n_features());
14 println!(" Feature names: {:?}", dataset.feature_names);
15
16 // Access data and target
17 println!("\nFirst 3 samples:");
18 for i in 0..3 {
19 let features = dataset.data.row(i);
20 let target = dataset.target.as_ref().map(|t| t[i]);
21 println!(
22 " Sample {}: Features = {:?}, Target = {:?}",
23 i, features, target
24 );
25 }
26
27 Ok(())
28}
examples/dataset_loaders.rs (line 64)
61fn print_dataset_info(dataset: &Dataset, name: &str) {
62 println!("=== {} Dataset ===", name);
63 println!("Number of samples: {}", dataset.n_samples());
64 println!("Number of features: {}", dataset.n_features());
65
66 if let Some(feature_names) = &dataset.feature_names {
67 println!(
68 "Features: {:?}",
69 &feature_names[0..std::cmp::min(5, feature_names.len())]
70 );
71 if feature_names.len() > 5 {
72 println!("... and {} more", feature_names.len() - 5);
73 }
74 }
75
76 if let Some(target) = &dataset.target {
77 println!("Target shape: {}", target.len());
78
79 if let Some(target_names) = &dataset.target_names {
80 println!("Target classes: {:?}", target_names);
81 }
82 }
83
84 for (key, value) in &dataset.metadata {
85 println!("Metadata - {}: {}", key, value);
86 }
87}
examples/data_generators.rs (line 26)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("Creating synthetic datasets...\n");
7
8 // Generate classification dataset
9 let n_samples = 100;
10 let n_features = 5;
11
12 let classification_data = make_classification(
13 n_samples,
14 n_features,
15 3, // 3 classes
16 2, // 2 clusters per class
17 3, // 3 informative features
18 Some(42), // random seed
19 )?;
20
21 // Train-test split
22 let (train, test) = classification_data.train_test_split(0.2, Some(42))?;
23
24 println!("Classification dataset:");
25 println!(" Total samples: {}", classification_data.n_samples());
26 println!(" Features: {}", classification_data.n_features());
27 println!(" Training samples: {}", train.n_samples());
28 println!(" Test samples: {}", test.n_samples());
29
30 // Generate regression dataset
31 let regression_data = make_regression(
32 n_samples,
33 n_features,
34 3, // 3 informative features
35 0.5, // noise level
36 Some(42),
37 )?;
38
39 println!("\nRegression dataset:");
40 println!(" Samples: {}", regression_data.n_samples());
41 println!(" Features: {}", regression_data.n_features());
42
43 // Normalize the data (in-place)
44 let mut data_copy = regression_data.data.clone();
45 normalize(&mut data_copy);
46 println!(" Data normalized successfully");
47
48 // Generate clustering data (blobs)
49 let clustering_data = make_blobs(
50 n_samples,
51 2, // 2 features for easy visualization
52 4, // 4 clusters
53 0.8, // cluster standard deviation
54 Some(42),
55 )?;
56
57 println!("\nClustering dataset (blobs):");
58 println!(" Samples: {}", clustering_data.n_samples());
59 println!(" Features: {}", clustering_data.n_features());
60
61 // Find the number of clusters by finding the max value of target
62 let num_clusters = clustering_data.target.as_ref().map_or(0, |t| {
63 let mut max_val = -1.0;
64 for &val in t.iter() {
65 if val > max_val {
66 max_val = val;
67 }
68 }
69 (max_val as usize) + 1
70 });
71
72 println!(" Clusters: {}", num_clusters);
73
74 // Generate time series data
75 let time_series = make_time_series(
76 100, // 100 time steps
77 3, // 3 features/variables
78 true, // with trend
79 true, // with seasonality
80 0.2, // noise level
81 Some(42),
82 )?;
83
84 println!("\nTime series dataset:");
85 println!(" Time steps: {}", time_series.n_samples());
86 println!(" Features: {}", time_series.n_features());
87
88 Ok(())
89}
Sourcepub fn train_test_split(
&self,
test_size: f64,
random_seed: Option<u64>,
) -> Result<(Dataset, Dataset)>
pub fn train_test_split( &self, test_size: f64, random_seed: Option<u64>, ) -> Result<(Dataset, Dataset)>
Split the dataset into training and test sets
Examples found in repository?
examples/dataset_loaders.rs (line 31)
6fn main() {
7 // Check if a CSV file is provided as a command-line argument
8 let args: Vec<String> = env::args().collect();
9 if args.len() < 2 {
10 println!("Usage: {} <path_to_csv_file>", args[0]);
11 println!("Example: {} examples/sample_data.csv", args[0]);
12 return;
13 }
14
15 let file_path = &args[1];
16
17 // Verify the file exists
18 if !Path::new(file_path).exists() {
19 println!("Error: File '{}' does not exist", file_path);
20 return;
21 }
22
23 // Load CSV file
24 println!("Loading CSV file: {}", file_path);
25 match loaders::load_csv(file_path, true, None) {
26 Ok(dataset) => {
27 print_dataset_info(&dataset, "Loaded CSV");
28
29 // Split the dataset for demonstration
30 println!("\nDemonstrating train-test split...");
31 match dataset.train_test_split(0.2, Some(42)) {
32 Ok((train, test)) => {
33 println!("Training set: {} samples", train.n_samples());
34 println!("Test set: {} samples", test.n_samples());
35
36 // Save as JSON for demonstration
37 let json_path = format!("{}.json", file_path);
38 println!("\nSaving training dataset to JSON: {}", json_path);
39 if let Err(e) = loaders::save_json(&train, &json_path) {
40 println!("Error saving JSON: {}", e);
41 } else {
42 println!("Successfully saved JSON file");
43
44 // Load back the JSON file
45 println!("\nLoading back from JSON file...");
46 match loaders::load_json(&json_path) {
47 Ok(loaded) => {
48 print_dataset_info(&loaded, "Loaded JSON");
49 }
50 Err(e) => println!("Error loading JSON: {}", e),
51 }
52 }
53 }
54 Err(e) => println!("Error splitting dataset: {}", e),
55 }
56 }
57 Err(e) => println!("Error loading CSV: {}", e),
58 }
59}
More examples
examples/data_generators.rs (line 22)
5fn main() -> Result<(), Box<dyn std::error::Error>> {
6 println!("Creating synthetic datasets...\n");
7
8 // Generate classification dataset
9 let n_samples = 100;
10 let n_features = 5;
11
12 let classification_data = make_classification(
13 n_samples,
14 n_features,
15 3, // 3 classes
16 2, // 2 clusters per class
17 3, // 3 informative features
18 Some(42), // random seed
19 )?;
20
21 // Train-test split
22 let (train, test) = classification_data.train_test_split(0.2, Some(42))?;
23
24 println!("Classification dataset:");
25 println!(" Total samples: {}", classification_data.n_samples());
26 println!(" Features: {}", classification_data.n_features());
27 println!(" Training samples: {}", train.n_samples());
28 println!(" Test samples: {}", test.n_samples());
29
30 // Generate regression dataset
31 let regression_data = make_regression(
32 n_samples,
33 n_features,
34 3, // 3 informative features
35 0.5, // noise level
36 Some(42),
37 )?;
38
39 println!("\nRegression dataset:");
40 println!(" Samples: {}", regression_data.n_samples());
41 println!(" Features: {}", regression_data.n_features());
42
43 // Normalize the data (in-place)
44 let mut data_copy = regression_data.data.clone();
45 normalize(&mut data_copy);
46 println!(" Data normalized successfully");
47
48 // Generate clustering data (blobs)
49 let clustering_data = make_blobs(
50 n_samples,
51 2, // 2 features for easy visualization
52 4, // 4 clusters
53 0.8, // cluster standard deviation
54 Some(42),
55 )?;
56
57 println!("\nClustering dataset (blobs):");
58 println!(" Samples: {}", clustering_data.n_samples());
59 println!(" Features: {}", clustering_data.n_features());
60
61 // Find the number of clusters by finding the max value of target
62 let num_clusters = clustering_data.target.as_ref().map_or(0, |t| {
63 let mut max_val = -1.0;
64 for &val in t.iter() {
65 if val > max_val {
66 max_val = val;
67 }
68 }
69 (max_val as usize) + 1
70 });
71
72 println!(" Clusters: {}", num_clusters);
73
74 // Generate time series data
75 let time_series = make_time_series(
76 100, // 100 time steps
77 3, // 3 features/variables
78 true, // with trend
79 true, // with seasonality
80 0.2, // noise level
81 Some(42),
82 )?;
83
84 println!("\nTime series dataset:");
85 println!(" Time steps: {}", time_series.n_samples());
86 println!(" Features: {}", time_series.n_features());
87
88 Ok(())
89}
Trait Implementations§
Source§impl<'de> Deserialize<'de> for Dataset
impl<'de> Deserialize<'de> for Dataset
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl Freeze for Dataset
impl RefUnwindSafe for Dataset
impl Send for Dataset
impl Sync for Dataset
impl Unpin for Dataset
impl UnwindSafe for Dataset
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more